]> git.proxmox.com Git - mirror_ubuntu-hirsute-kernel.git/blame - fs/btrfs/free-space-cache.c
btrfs: remove crc_check logic from free space
[mirror_ubuntu-hirsute-kernel.git] / fs / btrfs / free-space-cache.c
CommitLineData
c1d7c514 1// SPDX-License-Identifier: GPL-2.0
0f9dd46c
JB
2/*
3 * Copyright (C) 2008 Red Hat. All rights reserved.
0f9dd46c
JB
4 */
5
96303081 6#include <linux/pagemap.h>
0f9dd46c 7#include <linux/sched.h>
f361bf4a 8#include <linux/sched/signal.h>
5a0e3ad6 9#include <linux/slab.h>
96303081 10#include <linux/math64.h>
6ab60601 11#include <linux/ratelimit.h>
540adea3 12#include <linux/error-injection.h>
84de76a2 13#include <linux/sched/mm.h>
0f9dd46c 14#include "ctree.h"
fa9c0d79
CM
15#include "free-space-cache.h"
16#include "transaction.h"
0af3d00b 17#include "disk-io.h"
43be2146 18#include "extent_io.h"
04216820 19#include "volumes.h"
8719aaae 20#include "space-info.h"
86736342 21#include "delalloc-space.h"
aac0023c 22#include "block-group.h"
b0643e59 23#include "discard.h"
fa9c0d79 24
0ef6447a 25#define BITS_PER_BITMAP (PAGE_SIZE * 8UL)
5d90c5c7
DZ
26#define MAX_CACHE_BYTES_PER_GIG SZ_64K
27#define FORCE_EXTENT_THRESHOLD SZ_1M
0f9dd46c 28
55507ce3
FM
29struct btrfs_trim_range {
30 u64 start;
31 u64 bytes;
32 struct list_head list;
33};
34
34d52cb6 35static int link_free_space(struct btrfs_free_space_ctl *ctl,
0cb59c99 36 struct btrfs_free_space *info);
cd023e7b
JB
37static void unlink_free_space(struct btrfs_free_space_ctl *ctl,
38 struct btrfs_free_space *info);
cd79909b
JB
39static int search_bitmap(struct btrfs_free_space_ctl *ctl,
40 struct btrfs_free_space *bitmap_info, u64 *offset,
41 u64 *bytes, bool for_alloc);
42static void free_bitmap(struct btrfs_free_space_ctl *ctl,
43 struct btrfs_free_space *bitmap_info);
44static void bitmap_clear_bits(struct btrfs_free_space_ctl *ctl,
45 struct btrfs_free_space *info, u64 offset,
46 u64 bytes);
0cb59c99 47
0414efae
LZ
48static struct inode *__lookup_free_space_inode(struct btrfs_root *root,
49 struct btrfs_path *path,
50 u64 offset)
0af3d00b 51{
0b246afa 52 struct btrfs_fs_info *fs_info = root->fs_info;
0af3d00b
JB
53 struct btrfs_key key;
54 struct btrfs_key location;
55 struct btrfs_disk_key disk_key;
56 struct btrfs_free_space_header *header;
57 struct extent_buffer *leaf;
58 struct inode *inode = NULL;
84de76a2 59 unsigned nofs_flag;
0af3d00b
JB
60 int ret;
61
0af3d00b 62 key.objectid = BTRFS_FREE_SPACE_OBJECTID;
0414efae 63 key.offset = offset;
0af3d00b
JB
64 key.type = 0;
65
66 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
67 if (ret < 0)
68 return ERR_PTR(ret);
69 if (ret > 0) {
b3b4aa74 70 btrfs_release_path(path);
0af3d00b
JB
71 return ERR_PTR(-ENOENT);
72 }
73
74 leaf = path->nodes[0];
75 header = btrfs_item_ptr(leaf, path->slots[0],
76 struct btrfs_free_space_header);
77 btrfs_free_space_key(leaf, header, &disk_key);
78 btrfs_disk_key_to_cpu(&location, &disk_key);
b3b4aa74 79 btrfs_release_path(path);
0af3d00b 80
84de76a2
JB
81 /*
82 * We are often under a trans handle at this point, so we need to make
83 * sure NOFS is set to keep us from deadlocking.
84 */
85 nofs_flag = memalloc_nofs_save();
0202e83f 86 inode = btrfs_iget_path(fs_info->sb, location.objectid, root, path);
4222ea71 87 btrfs_release_path(path);
84de76a2 88 memalloc_nofs_restore(nofs_flag);
0af3d00b
JB
89 if (IS_ERR(inode))
90 return inode;
0af3d00b 91
528c0327 92 mapping_set_gfp_mask(inode->i_mapping,
c62d2555
MH
93 mapping_gfp_constraint(inode->i_mapping,
94 ~(__GFP_FS | __GFP_HIGHMEM)));
adae52b9 95
0414efae
LZ
96 return inode;
97}
98
32da5386 99struct inode *lookup_free_space_inode(struct btrfs_block_group *block_group,
7949f339 100 struct btrfs_path *path)
0414efae 101{
7949f339 102 struct btrfs_fs_info *fs_info = block_group->fs_info;
0414efae 103 struct inode *inode = NULL;
5b0e95bf 104 u32 flags = BTRFS_INODE_NODATASUM | BTRFS_INODE_NODATACOW;
0414efae
LZ
105
106 spin_lock(&block_group->lock);
107 if (block_group->inode)
108 inode = igrab(block_group->inode);
109 spin_unlock(&block_group->lock);
110 if (inode)
111 return inode;
112
77ab86bf 113 inode = __lookup_free_space_inode(fs_info->tree_root, path,
b3470b5d 114 block_group->start);
0414efae
LZ
115 if (IS_ERR(inode))
116 return inode;
117
0af3d00b 118 spin_lock(&block_group->lock);
5b0e95bf 119 if (!((BTRFS_I(inode)->flags & flags) == flags)) {
0b246afa 120 btrfs_info(fs_info, "Old style space inode found, converting.");
5b0e95bf
JB
121 BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM |
122 BTRFS_INODE_NODATACOW;
2f356126
JB
123 block_group->disk_cache_state = BTRFS_DC_CLEAR;
124 }
125
300e4f8a 126 if (!block_group->iref) {
0af3d00b
JB
127 block_group->inode = igrab(inode);
128 block_group->iref = 1;
129 }
130 spin_unlock(&block_group->lock);
131
132 return inode;
133}
134
48a3b636
ES
135static int __create_free_space_inode(struct btrfs_root *root,
136 struct btrfs_trans_handle *trans,
137 struct btrfs_path *path,
138 u64 ino, u64 offset)
0af3d00b
JB
139{
140 struct btrfs_key key;
141 struct btrfs_disk_key disk_key;
142 struct btrfs_free_space_header *header;
143 struct btrfs_inode_item *inode_item;
144 struct extent_buffer *leaf;
5b0e95bf 145 u64 flags = BTRFS_INODE_NOCOMPRESS | BTRFS_INODE_PREALLOC;
0af3d00b
JB
146 int ret;
147
0414efae 148 ret = btrfs_insert_empty_inode(trans, root, path, ino);
0af3d00b
JB
149 if (ret)
150 return ret;
151
5b0e95bf
JB
152 /* We inline crc's for the free disk space cache */
153 if (ino != BTRFS_FREE_INO_OBJECTID)
154 flags |= BTRFS_INODE_NODATASUM | BTRFS_INODE_NODATACOW;
155
0af3d00b
JB
156 leaf = path->nodes[0];
157 inode_item = btrfs_item_ptr(leaf, path->slots[0],
158 struct btrfs_inode_item);
159 btrfs_item_key(leaf, &disk_key, path->slots[0]);
b159fa28 160 memzero_extent_buffer(leaf, (unsigned long)inode_item,
0af3d00b
JB
161 sizeof(*inode_item));
162 btrfs_set_inode_generation(leaf, inode_item, trans->transid);
163 btrfs_set_inode_size(leaf, inode_item, 0);
164 btrfs_set_inode_nbytes(leaf, inode_item, 0);
165 btrfs_set_inode_uid(leaf, inode_item, 0);
166 btrfs_set_inode_gid(leaf, inode_item, 0);
167 btrfs_set_inode_mode(leaf, inode_item, S_IFREG | 0600);
5b0e95bf 168 btrfs_set_inode_flags(leaf, inode_item, flags);
0af3d00b
JB
169 btrfs_set_inode_nlink(leaf, inode_item, 1);
170 btrfs_set_inode_transid(leaf, inode_item, trans->transid);
0414efae 171 btrfs_set_inode_block_group(leaf, inode_item, offset);
0af3d00b 172 btrfs_mark_buffer_dirty(leaf);
b3b4aa74 173 btrfs_release_path(path);
0af3d00b
JB
174
175 key.objectid = BTRFS_FREE_SPACE_OBJECTID;
0414efae 176 key.offset = offset;
0af3d00b 177 key.type = 0;
0af3d00b
JB
178 ret = btrfs_insert_empty_item(trans, root, path, &key,
179 sizeof(struct btrfs_free_space_header));
180 if (ret < 0) {
b3b4aa74 181 btrfs_release_path(path);
0af3d00b
JB
182 return ret;
183 }
c9dc4c65 184
0af3d00b
JB
185 leaf = path->nodes[0];
186 header = btrfs_item_ptr(leaf, path->slots[0],
187 struct btrfs_free_space_header);
b159fa28 188 memzero_extent_buffer(leaf, (unsigned long)header, sizeof(*header));
0af3d00b
JB
189 btrfs_set_free_space_key(leaf, header, &disk_key);
190 btrfs_mark_buffer_dirty(leaf);
b3b4aa74 191 btrfs_release_path(path);
0af3d00b
JB
192
193 return 0;
194}
195
4ca75f1b 196int create_free_space_inode(struct btrfs_trans_handle *trans,
32da5386 197 struct btrfs_block_group *block_group,
0414efae
LZ
198 struct btrfs_path *path)
199{
200 int ret;
201 u64 ino;
202
4ca75f1b 203 ret = btrfs_find_free_objectid(trans->fs_info->tree_root, &ino);
0414efae
LZ
204 if (ret < 0)
205 return ret;
206
4ca75f1b 207 return __create_free_space_inode(trans->fs_info->tree_root, trans, path,
b3470b5d 208 ino, block_group->start);
0414efae
LZ
209}
210
2ff7e61e 211int btrfs_check_trunc_cache_free_space(struct btrfs_fs_info *fs_info,
7b61cd92 212 struct btrfs_block_rsv *rsv)
0af3d00b 213{
c8174313 214 u64 needed_bytes;
7b61cd92 215 int ret;
c8174313
JB
216
217 /* 1 for slack space, 1 for updating the inode */
2bd36e7b
JB
218 needed_bytes = btrfs_calc_insert_metadata_size(fs_info, 1) +
219 btrfs_calc_metadata_size(fs_info, 1);
c8174313 220
7b61cd92
MX
221 spin_lock(&rsv->lock);
222 if (rsv->reserved < needed_bytes)
223 ret = -ENOSPC;
224 else
225 ret = 0;
226 spin_unlock(&rsv->lock);
4b286cd1 227 return ret;
7b61cd92
MX
228}
229
77ab86bf 230int btrfs_truncate_free_space_cache(struct btrfs_trans_handle *trans,
32da5386 231 struct btrfs_block_group *block_group,
7b61cd92
MX
232 struct inode *inode)
233{
77ab86bf 234 struct btrfs_root *root = BTRFS_I(inode)->root;
7b61cd92 235 int ret = 0;
35c76642 236 bool locked = false;
1bbc621e 237
1bbc621e 238 if (block_group) {
21e75ffe
JM
239 struct btrfs_path *path = btrfs_alloc_path();
240
241 if (!path) {
242 ret = -ENOMEM;
243 goto fail;
244 }
35c76642 245 locked = true;
1bbc621e
CM
246 mutex_lock(&trans->transaction->cache_write_mutex);
247 if (!list_empty(&block_group->io_list)) {
248 list_del_init(&block_group->io_list);
249
afdb5718 250 btrfs_wait_cache_io(trans, block_group, path);
1bbc621e
CM
251 btrfs_put_block_group(block_group);
252 }
253
254 /*
255 * now that we've truncated the cache away, its no longer
256 * setup or written
257 */
258 spin_lock(&block_group->lock);
259 block_group->disk_cache_state = BTRFS_DC_CLEAR;
260 spin_unlock(&block_group->lock);
21e75ffe 261 btrfs_free_path(path);
1bbc621e 262 }
0af3d00b 263
6ef06d27 264 btrfs_i_size_write(BTRFS_I(inode), 0);
7caef267 265 truncate_pagecache(inode, 0);
0af3d00b
JB
266
267 /*
f7e9e8fc
OS
268 * We skip the throttling logic for free space cache inodes, so we don't
269 * need to check for -EAGAIN.
0af3d00b 270 */
50743398 271 ret = btrfs_truncate_inode_items(trans, root, BTRFS_I(inode),
0af3d00b 272 0, BTRFS_EXTENT_DATA_KEY);
35c76642
FM
273 if (ret)
274 goto fail;
0af3d00b 275
9a56fcd1 276 ret = btrfs_update_inode(trans, root, BTRFS_I(inode));
1bbc621e 277
1bbc621e 278fail:
35c76642
FM
279 if (locked)
280 mutex_unlock(&trans->transaction->cache_write_mutex);
79787eaa 281 if (ret)
66642832 282 btrfs_abort_transaction(trans, ret);
c8174313 283
82d5902d 284 return ret;
0af3d00b
JB
285}
286
1d480538 287static void readahead_cache(struct inode *inode)
9d66e233
JB
288{
289 struct file_ra_state *ra;
290 unsigned long last_index;
291
292 ra = kzalloc(sizeof(*ra), GFP_NOFS);
293 if (!ra)
1d480538 294 return;
9d66e233
JB
295
296 file_ra_state_init(ra, inode->i_mapping);
09cbfeaf 297 last_index = (i_size_read(inode) - 1) >> PAGE_SHIFT;
9d66e233
JB
298
299 page_cache_sync_readahead(inode->i_mapping, ra, NULL, 0, last_index);
300
301 kfree(ra);
9d66e233
JB
302}
303
4c6d1d85 304static int io_ctl_init(struct btrfs_io_ctl *io_ctl, struct inode *inode,
f15376df 305 int write)
a67509c3 306{
5349d6c3 307 int num_pages;
5349d6c3 308
09cbfeaf 309 num_pages = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
5349d6c3 310
8f6c72a9 311 /* Make sure we can fit our crcs and generation into the first page */
7dbdb443 312 if (write && (num_pages * sizeof(u32) + sizeof(u64)) > PAGE_SIZE)
5349d6c3
MX
313 return -ENOSPC;
314
4c6d1d85 315 memset(io_ctl, 0, sizeof(struct btrfs_io_ctl));
5349d6c3 316
31e818fe 317 io_ctl->pages = kcalloc(num_pages, sizeof(struct page *), GFP_NOFS);
a67509c3
JB
318 if (!io_ctl->pages)
319 return -ENOMEM;
5349d6c3
MX
320
321 io_ctl->num_pages = num_pages;
f15376df 322 io_ctl->fs_info = btrfs_sb(inode->i_sb);
c9dc4c65 323 io_ctl->inode = inode;
5349d6c3 324
a67509c3
JB
325 return 0;
326}
663faf9f 327ALLOW_ERROR_INJECTION(io_ctl_init, ERRNO);
a67509c3 328
4c6d1d85 329static void io_ctl_free(struct btrfs_io_ctl *io_ctl)
a67509c3
JB
330{
331 kfree(io_ctl->pages);
c9dc4c65 332 io_ctl->pages = NULL;
a67509c3
JB
333}
334
4c6d1d85 335static void io_ctl_unmap_page(struct btrfs_io_ctl *io_ctl)
a67509c3
JB
336{
337 if (io_ctl->cur) {
a67509c3
JB
338 io_ctl->cur = NULL;
339 io_ctl->orig = NULL;
340 }
341}
342
4c6d1d85 343static void io_ctl_map_page(struct btrfs_io_ctl *io_ctl, int clear)
a67509c3 344{
b12d6869 345 ASSERT(io_ctl->index < io_ctl->num_pages);
a67509c3 346 io_ctl->page = io_ctl->pages[io_ctl->index++];
2b108268 347 io_ctl->cur = page_address(io_ctl->page);
a67509c3 348 io_ctl->orig = io_ctl->cur;
09cbfeaf 349 io_ctl->size = PAGE_SIZE;
a67509c3 350 if (clear)
619a9742 351 clear_page(io_ctl->cur);
a67509c3
JB
352}
353
4c6d1d85 354static void io_ctl_drop_pages(struct btrfs_io_ctl *io_ctl)
a67509c3
JB
355{
356 int i;
357
358 io_ctl_unmap_page(io_ctl);
359
360 for (i = 0; i < io_ctl->num_pages; i++) {
a1ee5a45
LZ
361 if (io_ctl->pages[i]) {
362 ClearPageChecked(io_ctl->pages[i]);
363 unlock_page(io_ctl->pages[i]);
09cbfeaf 364 put_page(io_ctl->pages[i]);
a1ee5a45 365 }
a67509c3
JB
366 }
367}
368
7a195f6d 369static int io_ctl_prepare_pages(struct btrfs_io_ctl *io_ctl, bool uptodate)
a67509c3
JB
370{
371 struct page *page;
831fa14f 372 struct inode *inode = io_ctl->inode;
a67509c3
JB
373 gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping);
374 int i;
375
376 for (i = 0; i < io_ctl->num_pages; i++) {
377 page = find_or_create_page(inode->i_mapping, i, mask);
378 if (!page) {
379 io_ctl_drop_pages(io_ctl);
380 return -ENOMEM;
381 }
382 io_ctl->pages[i] = page;
383 if (uptodate && !PageUptodate(page)) {
384 btrfs_readpage(NULL, page);
385 lock_page(page);
3797136b
JB
386 if (page->mapping != inode->i_mapping) {
387 btrfs_err(BTRFS_I(inode)->root->fs_info,
388 "free space cache page truncated");
389 io_ctl_drop_pages(io_ctl);
390 return -EIO;
391 }
a67509c3 392 if (!PageUptodate(page)) {
efe120a0
FH
393 btrfs_err(BTRFS_I(inode)->root->fs_info,
394 "error reading free space cache");
a67509c3
JB
395 io_ctl_drop_pages(io_ctl);
396 return -EIO;
397 }
398 }
399 }
400
f7d61dcd
JB
401 for (i = 0; i < io_ctl->num_pages; i++) {
402 clear_page_dirty_for_io(io_ctl->pages[i]);
403 set_page_extent_mapped(io_ctl->pages[i]);
404 }
405
a67509c3
JB
406 return 0;
407}
408
4c6d1d85 409static void io_ctl_set_generation(struct btrfs_io_ctl *io_ctl, u64 generation)
a67509c3 410{
a67509c3
JB
411 io_ctl_map_page(io_ctl, 1);
412
413 /*
5b0e95bf
JB
414 * Skip the csum areas. If we don't check crcs then we just have a
415 * 64bit chunk at the front of the first page.
a67509c3 416 */
7dbdb443
NB
417 io_ctl->cur += (sizeof(u32) * io_ctl->num_pages);
418 io_ctl->size -= sizeof(u64) + (sizeof(u32) * io_ctl->num_pages);
a67509c3 419
6994ca36 420 put_unaligned_le64(generation, io_ctl->cur);
a67509c3 421 io_ctl->cur += sizeof(u64);
a67509c3
JB
422}
423
4c6d1d85 424static int io_ctl_check_generation(struct btrfs_io_ctl *io_ctl, u64 generation)
a67509c3 425{
6994ca36 426 u64 cache_gen;
a67509c3 427
5b0e95bf
JB
428 /*
429 * Skip the crc area. If we don't check crcs then we just have a 64bit
430 * chunk at the front of the first page.
431 */
7dbdb443
NB
432 io_ctl->cur += sizeof(u32) * io_ctl->num_pages;
433 io_ctl->size -= sizeof(u64) + (sizeof(u32) * io_ctl->num_pages);
a67509c3 434
6994ca36
DS
435 cache_gen = get_unaligned_le64(io_ctl->cur);
436 if (cache_gen != generation) {
f15376df 437 btrfs_err_rl(io_ctl->fs_info,
94647322 438 "space cache generation (%llu) does not match inode (%llu)",
6994ca36 439 cache_gen, generation);
a67509c3
JB
440 io_ctl_unmap_page(io_ctl);
441 return -EIO;
442 }
443 io_ctl->cur += sizeof(u64);
5b0e95bf
JB
444 return 0;
445}
446
4c6d1d85 447static void io_ctl_set_crc(struct btrfs_io_ctl *io_ctl, int index)
5b0e95bf
JB
448{
449 u32 *tmp;
450 u32 crc = ~(u32)0;
451 unsigned offset = 0;
452
5b0e95bf 453 if (index == 0)
cb54f257 454 offset = sizeof(u32) * io_ctl->num_pages;
5b0e95bf 455
4bb3c2e2
JT
456 crc = btrfs_crc32c(crc, io_ctl->orig + offset, PAGE_SIZE - offset);
457 btrfs_crc32c_final(crc, (u8 *)&crc);
5b0e95bf 458 io_ctl_unmap_page(io_ctl);
2b108268 459 tmp = page_address(io_ctl->pages[0]);
5b0e95bf
JB
460 tmp += index;
461 *tmp = crc;
5b0e95bf
JB
462}
463
4c6d1d85 464static int io_ctl_check_crc(struct btrfs_io_ctl *io_ctl, int index)
5b0e95bf
JB
465{
466 u32 *tmp, val;
467 u32 crc = ~(u32)0;
468 unsigned offset = 0;
469
5b0e95bf
JB
470 if (index == 0)
471 offset = sizeof(u32) * io_ctl->num_pages;
472
2b108268 473 tmp = page_address(io_ctl->pages[0]);
5b0e95bf
JB
474 tmp += index;
475 val = *tmp;
5b0e95bf
JB
476
477 io_ctl_map_page(io_ctl, 0);
4bb3c2e2
JT
478 crc = btrfs_crc32c(crc, io_ctl->orig + offset, PAGE_SIZE - offset);
479 btrfs_crc32c_final(crc, (u8 *)&crc);
5b0e95bf 480 if (val != crc) {
f15376df 481 btrfs_err_rl(io_ctl->fs_info,
94647322 482 "csum mismatch on free space cache");
5b0e95bf
JB
483 io_ctl_unmap_page(io_ctl);
484 return -EIO;
485 }
486
a67509c3
JB
487 return 0;
488}
489
4c6d1d85 490static int io_ctl_add_entry(struct btrfs_io_ctl *io_ctl, u64 offset, u64 bytes,
a67509c3
JB
491 void *bitmap)
492{
493 struct btrfs_free_space_entry *entry;
494
495 if (!io_ctl->cur)
496 return -ENOSPC;
497
498 entry = io_ctl->cur;
6994ca36
DS
499 put_unaligned_le64(offset, &entry->offset);
500 put_unaligned_le64(bytes, &entry->bytes);
a67509c3
JB
501 entry->type = (bitmap) ? BTRFS_FREE_SPACE_BITMAP :
502 BTRFS_FREE_SPACE_EXTENT;
503 io_ctl->cur += sizeof(struct btrfs_free_space_entry);
504 io_ctl->size -= sizeof(struct btrfs_free_space_entry);
505
506 if (io_ctl->size >= sizeof(struct btrfs_free_space_entry))
507 return 0;
508
5b0e95bf 509 io_ctl_set_crc(io_ctl, io_ctl->index - 1);
a67509c3
JB
510
511 /* No more pages to map */
512 if (io_ctl->index >= io_ctl->num_pages)
513 return 0;
514
515 /* map the next page */
516 io_ctl_map_page(io_ctl, 1);
517 return 0;
518}
519
4c6d1d85 520static int io_ctl_add_bitmap(struct btrfs_io_ctl *io_ctl, void *bitmap)
a67509c3
JB
521{
522 if (!io_ctl->cur)
523 return -ENOSPC;
524
525 /*
526 * If we aren't at the start of the current page, unmap this one and
527 * map the next one if there is any left.
528 */
529 if (io_ctl->cur != io_ctl->orig) {
5b0e95bf 530 io_ctl_set_crc(io_ctl, io_ctl->index - 1);
a67509c3
JB
531 if (io_ctl->index >= io_ctl->num_pages)
532 return -ENOSPC;
533 io_ctl_map_page(io_ctl, 0);
534 }
535
69d24804 536 copy_page(io_ctl->cur, bitmap);
5b0e95bf 537 io_ctl_set_crc(io_ctl, io_ctl->index - 1);
a67509c3
JB
538 if (io_ctl->index < io_ctl->num_pages)
539 io_ctl_map_page(io_ctl, 0);
540 return 0;
541}
542
4c6d1d85 543static void io_ctl_zero_remaining_pages(struct btrfs_io_ctl *io_ctl)
a67509c3 544{
5b0e95bf
JB
545 /*
546 * If we're not on the boundary we know we've modified the page and we
547 * need to crc the page.
548 */
549 if (io_ctl->cur != io_ctl->orig)
550 io_ctl_set_crc(io_ctl, io_ctl->index - 1);
551 else
552 io_ctl_unmap_page(io_ctl);
a67509c3
JB
553
554 while (io_ctl->index < io_ctl->num_pages) {
555 io_ctl_map_page(io_ctl, 1);
5b0e95bf 556 io_ctl_set_crc(io_ctl, io_ctl->index - 1);
a67509c3
JB
557 }
558}
559
4c6d1d85 560static int io_ctl_read_entry(struct btrfs_io_ctl *io_ctl,
5b0e95bf 561 struct btrfs_free_space *entry, u8 *type)
a67509c3
JB
562{
563 struct btrfs_free_space_entry *e;
2f120c05
JB
564 int ret;
565
566 if (!io_ctl->cur) {
567 ret = io_ctl_check_crc(io_ctl, io_ctl->index);
568 if (ret)
569 return ret;
570 }
a67509c3
JB
571
572 e = io_ctl->cur;
6994ca36
DS
573 entry->offset = get_unaligned_le64(&e->offset);
574 entry->bytes = get_unaligned_le64(&e->bytes);
5b0e95bf 575 *type = e->type;
a67509c3
JB
576 io_ctl->cur += sizeof(struct btrfs_free_space_entry);
577 io_ctl->size -= sizeof(struct btrfs_free_space_entry);
578
579 if (io_ctl->size >= sizeof(struct btrfs_free_space_entry))
5b0e95bf 580 return 0;
a67509c3
JB
581
582 io_ctl_unmap_page(io_ctl);
583
2f120c05 584 return 0;
a67509c3
JB
585}
586
4c6d1d85 587static int io_ctl_read_bitmap(struct btrfs_io_ctl *io_ctl,
5b0e95bf 588 struct btrfs_free_space *entry)
a67509c3 589{
5b0e95bf
JB
590 int ret;
591
5b0e95bf
JB
592 ret = io_ctl_check_crc(io_ctl, io_ctl->index);
593 if (ret)
594 return ret;
595
69d24804 596 copy_page(entry->bitmap, io_ctl->cur);
a67509c3 597 io_ctl_unmap_page(io_ctl);
5b0e95bf
JB
598
599 return 0;
a67509c3
JB
600}
601
48a3b636
ES
602static int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
603 struct btrfs_free_space_ctl *ctl,
604 struct btrfs_path *path, u64 offset)
9d66e233 605{
3ffbd68c 606 struct btrfs_fs_info *fs_info = root->fs_info;
9d66e233
JB
607 struct btrfs_free_space_header *header;
608 struct extent_buffer *leaf;
4c6d1d85 609 struct btrfs_io_ctl io_ctl;
9d66e233 610 struct btrfs_key key;
a67509c3 611 struct btrfs_free_space *e, *n;
b76808fc 612 LIST_HEAD(bitmaps);
9d66e233
JB
613 u64 num_entries;
614 u64 num_bitmaps;
615 u64 generation;
a67509c3 616 u8 type;
f6a39829 617 int ret = 0;
9d66e233 618
9d66e233 619 /* Nothing in the space cache, goodbye */
0414efae 620 if (!i_size_read(inode))
a67509c3 621 return 0;
9d66e233
JB
622
623 key.objectid = BTRFS_FREE_SPACE_OBJECTID;
0414efae 624 key.offset = offset;
9d66e233
JB
625 key.type = 0;
626
627 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
0414efae 628 if (ret < 0)
a67509c3 629 return 0;
0414efae 630 else if (ret > 0) {
945d8962 631 btrfs_release_path(path);
a67509c3 632 return 0;
9d66e233
JB
633 }
634
0414efae
LZ
635 ret = -1;
636
9d66e233
JB
637 leaf = path->nodes[0];
638 header = btrfs_item_ptr(leaf, path->slots[0],
639 struct btrfs_free_space_header);
640 num_entries = btrfs_free_space_entries(leaf, header);
641 num_bitmaps = btrfs_free_space_bitmaps(leaf, header);
642 generation = btrfs_free_space_generation(leaf, header);
945d8962 643 btrfs_release_path(path);
9d66e233 644
e570fd27 645 if (!BTRFS_I(inode)->generation) {
0b246afa 646 btrfs_info(fs_info,
913e1535 647 "the free space cache file (%llu) is invalid, skip it",
e570fd27
MX
648 offset);
649 return 0;
650 }
651
9d66e233 652 if (BTRFS_I(inode)->generation != generation) {
0b246afa
JM
653 btrfs_err(fs_info,
654 "free space inode generation (%llu) did not match free space cache generation (%llu)",
655 BTRFS_I(inode)->generation, generation);
a67509c3 656 return 0;
9d66e233
JB
657 }
658
659 if (!num_entries)
a67509c3 660 return 0;
9d66e233 661
f15376df 662 ret = io_ctl_init(&io_ctl, inode, 0);
706efc66
LZ
663 if (ret)
664 return ret;
665
1d480538 666 readahead_cache(inode);
9d66e233 667
7a195f6d 668 ret = io_ctl_prepare_pages(&io_ctl, true);
a67509c3
JB
669 if (ret)
670 goto out;
9d66e233 671
5b0e95bf
JB
672 ret = io_ctl_check_crc(&io_ctl, 0);
673 if (ret)
674 goto free_cache;
675
a67509c3
JB
676 ret = io_ctl_check_generation(&io_ctl, generation);
677 if (ret)
678 goto free_cache;
9d66e233 679
a67509c3
JB
680 while (num_entries) {
681 e = kmem_cache_zalloc(btrfs_free_space_cachep,
682 GFP_NOFS);
683 if (!e)
9d66e233 684 goto free_cache;
9d66e233 685
5b0e95bf
JB
686 ret = io_ctl_read_entry(&io_ctl, e, &type);
687 if (ret) {
688 kmem_cache_free(btrfs_free_space_cachep, e);
689 goto free_cache;
690 }
691
a67509c3
JB
692 if (!e->bytes) {
693 kmem_cache_free(btrfs_free_space_cachep, e);
694 goto free_cache;
9d66e233 695 }
a67509c3
JB
696
697 if (type == BTRFS_FREE_SPACE_EXTENT) {
698 spin_lock(&ctl->tree_lock);
699 ret = link_free_space(ctl, e);
700 spin_unlock(&ctl->tree_lock);
701 if (ret) {
0b246afa 702 btrfs_err(fs_info,
c2cf52eb 703 "Duplicate entries in free space cache, dumping");
a67509c3 704 kmem_cache_free(btrfs_free_space_cachep, e);
9d66e233
JB
705 goto free_cache;
706 }
a67509c3 707 } else {
b12d6869 708 ASSERT(num_bitmaps);
a67509c3 709 num_bitmaps--;
3acd4850
CL
710 e->bitmap = kmem_cache_zalloc(
711 btrfs_free_space_bitmap_cachep, GFP_NOFS);
a67509c3
JB
712 if (!e->bitmap) {
713 kmem_cache_free(
714 btrfs_free_space_cachep, e);
9d66e233
JB
715 goto free_cache;
716 }
a67509c3
JB
717 spin_lock(&ctl->tree_lock);
718 ret = link_free_space(ctl, e);
719 ctl->total_bitmaps++;
720 ctl->op->recalc_thresholds(ctl);
721 spin_unlock(&ctl->tree_lock);
722 if (ret) {
0b246afa 723 btrfs_err(fs_info,
c2cf52eb 724 "Duplicate entries in free space cache, dumping");
dc89e982 725 kmem_cache_free(btrfs_free_space_cachep, e);
9d66e233
JB
726 goto free_cache;
727 }
a67509c3 728 list_add_tail(&e->list, &bitmaps);
9d66e233
JB
729 }
730
a67509c3
JB
731 num_entries--;
732 }
9d66e233 733
2f120c05
JB
734 io_ctl_unmap_page(&io_ctl);
735
a67509c3
JB
736 /*
737 * We add the bitmaps at the end of the entries in order that
738 * the bitmap entries are added to the cache.
739 */
740 list_for_each_entry_safe(e, n, &bitmaps, list) {
9d66e233 741 list_del_init(&e->list);
5b0e95bf
JB
742 ret = io_ctl_read_bitmap(&io_ctl, e);
743 if (ret)
744 goto free_cache;
9d66e233
JB
745 }
746
a67509c3 747 io_ctl_drop_pages(&io_ctl);
9d66e233
JB
748 ret = 1;
749out:
a67509c3 750 io_ctl_free(&io_ctl);
9d66e233 751 return ret;
9d66e233 752free_cache:
a67509c3 753 io_ctl_drop_pages(&io_ctl);
0414efae 754 __btrfs_remove_free_space_cache(ctl);
9d66e233
JB
755 goto out;
756}
757
cd79909b
JB
758static int copy_free_space_cache(struct btrfs_block_group *block_group,
759 struct btrfs_free_space_ctl *ctl)
760{
761 struct btrfs_free_space *info;
762 struct rb_node *n;
763 int ret = 0;
764
765 while (!ret && (n = rb_first(&ctl->free_space_offset)) != NULL) {
766 info = rb_entry(n, struct btrfs_free_space, offset_index);
767 if (!info->bitmap) {
768 unlink_free_space(ctl, info);
769 ret = btrfs_add_free_space(block_group, info->offset,
770 info->bytes);
771 kmem_cache_free(btrfs_free_space_cachep, info);
772 } else {
773 u64 offset = info->offset;
774 u64 bytes = ctl->unit;
775
776 while (search_bitmap(ctl, info, &offset, &bytes,
777 false) == 0) {
778 ret = btrfs_add_free_space(block_group, offset,
779 bytes);
780 if (ret)
781 break;
782 bitmap_clear_bits(ctl, info, offset, bytes);
783 offset = info->offset;
784 bytes = ctl->unit;
785 }
786 free_bitmap(ctl, info);
787 }
788 cond_resched();
789 }
790 return ret;
791}
792
32da5386 793int load_free_space_cache(struct btrfs_block_group *block_group)
0cb59c99 794{
bb6cb1c5 795 struct btrfs_fs_info *fs_info = block_group->fs_info;
34d52cb6 796 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
cd79909b 797 struct btrfs_free_space_ctl tmp_ctl = {};
0414efae
LZ
798 struct inode *inode;
799 struct btrfs_path *path;
5b0e95bf 800 int ret = 0;
0414efae 801 bool matched;
bf38be65 802 u64 used = block_group->used;
0414efae 803
cd79909b
JB
804 /*
805 * Because we could potentially discard our loaded free space, we want
806 * to load everything into a temporary structure first, and then if it's
807 * valid copy it all into the actual free space ctl.
808 */
809 btrfs_init_free_space_ctl(block_group, &tmp_ctl);
810
0414efae
LZ
811 /*
812 * If this block group has been marked to be cleared for one reason or
813 * another then we can't trust the on disk cache, so just return.
814 */
9d66e233 815 spin_lock(&block_group->lock);
0414efae
LZ
816 if (block_group->disk_cache_state != BTRFS_DC_WRITTEN) {
817 spin_unlock(&block_group->lock);
818 return 0;
819 }
9d66e233 820 spin_unlock(&block_group->lock);
0414efae
LZ
821
822 path = btrfs_alloc_path();
823 if (!path)
824 return 0;
d53ba474
JB
825 path->search_commit_root = 1;
826 path->skip_locking = 1;
0414efae 827
4222ea71
FM
828 /*
829 * We must pass a path with search_commit_root set to btrfs_iget in
830 * order to avoid a deadlock when allocating extents for the tree root.
831 *
832 * When we are COWing an extent buffer from the tree root, when looking
833 * for a free extent, at extent-tree.c:find_free_extent(), we can find
834 * block group without its free space cache loaded. When we find one
835 * we must load its space cache which requires reading its free space
836 * cache's inode item from the root tree. If this inode item is located
837 * in the same leaf that we started COWing before, then we end up in
838 * deadlock on the extent buffer (trying to read lock it when we
839 * previously write locked it).
840 *
841 * It's safe to read the inode item using the commit root because
842 * block groups, once loaded, stay in memory forever (until they are
843 * removed) as well as their space caches once loaded. New block groups
844 * once created get their ->cached field set to BTRFS_CACHE_FINISHED so
845 * we will never try to read their inode item while the fs is mounted.
846 */
7949f339 847 inode = lookup_free_space_inode(block_group, path);
0414efae
LZ
848 if (IS_ERR(inode)) {
849 btrfs_free_path(path);
850 return 0;
851 }
852
5b0e95bf
JB
853 /* We may have converted the inode and made the cache invalid. */
854 spin_lock(&block_group->lock);
855 if (block_group->disk_cache_state != BTRFS_DC_WRITTEN) {
856 spin_unlock(&block_group->lock);
a7e221e9 857 btrfs_free_path(path);
5b0e95bf
JB
858 goto out;
859 }
860 spin_unlock(&block_group->lock);
861
cd79909b 862 ret = __load_free_space_cache(fs_info->tree_root, inode, &tmp_ctl,
b3470b5d 863 path, block_group->start);
0414efae
LZ
864 btrfs_free_path(path);
865 if (ret <= 0)
866 goto out;
867
cd79909b
JB
868 matched = (tmp_ctl.free_space == (block_group->length - used -
869 block_group->bytes_super));
0414efae 870
cd79909b
JB
871 if (matched) {
872 ret = copy_free_space_cache(block_group, &tmp_ctl);
873 /*
874 * ret == 1 means we successfully loaded the free space cache,
875 * so we need to re-set it here.
876 */
877 if (ret == 0)
878 ret = 1;
879 } else {
880 __btrfs_remove_free_space_cache(&tmp_ctl);
5d163e0e
JM
881 btrfs_warn(fs_info,
882 "block group %llu has wrong amount of free space",
b3470b5d 883 block_group->start);
0414efae
LZ
884 ret = -1;
885 }
886out:
887 if (ret < 0) {
888 /* This cache is bogus, make sure it gets cleared */
889 spin_lock(&block_group->lock);
890 block_group->disk_cache_state = BTRFS_DC_CLEAR;
891 spin_unlock(&block_group->lock);
82d5902d 892 ret = 0;
0414efae 893
5d163e0e
JM
894 btrfs_warn(fs_info,
895 "failed to load free space cache for block group %llu, rebuilding it now",
b3470b5d 896 block_group->start);
0414efae
LZ
897 }
898
66b53bae
JB
899 spin_lock(&ctl->tree_lock);
900 btrfs_discard_update_discardable(block_group);
901 spin_unlock(&ctl->tree_lock);
0414efae
LZ
902 iput(inode);
903 return ret;
9d66e233
JB
904}
905
d4452bc5 906static noinline_for_stack
4c6d1d85 907int write_cache_extent_entries(struct btrfs_io_ctl *io_ctl,
d4452bc5 908 struct btrfs_free_space_ctl *ctl,
32da5386 909 struct btrfs_block_group *block_group,
d4452bc5
CM
910 int *entries, int *bitmaps,
911 struct list_head *bitmap_list)
0cb59c99 912{
c09544e0 913 int ret;
d4452bc5 914 struct btrfs_free_cluster *cluster = NULL;
1bbc621e 915 struct btrfs_free_cluster *cluster_locked = NULL;
d4452bc5 916 struct rb_node *node = rb_first(&ctl->free_space_offset);
55507ce3 917 struct btrfs_trim_range *trim_entry;
be1a12a0 918
43be2146 919 /* Get the cluster for this block_group if it exists */
d4452bc5 920 if (block_group && !list_empty(&block_group->cluster_list)) {
43be2146
JB
921 cluster = list_entry(block_group->cluster_list.next,
922 struct btrfs_free_cluster,
923 block_group_list);
d4452bc5 924 }
43be2146 925
f75b130e 926 if (!node && cluster) {
1bbc621e
CM
927 cluster_locked = cluster;
928 spin_lock(&cluster_locked->lock);
f75b130e
JB
929 node = rb_first(&cluster->root);
930 cluster = NULL;
931 }
932
a67509c3
JB
933 /* Write out the extent entries */
934 while (node) {
935 struct btrfs_free_space *e;
0cb59c99 936
a67509c3 937 e = rb_entry(node, struct btrfs_free_space, offset_index);
d4452bc5 938 *entries += 1;
0cb59c99 939
d4452bc5 940 ret = io_ctl_add_entry(io_ctl, e->offset, e->bytes,
a67509c3
JB
941 e->bitmap);
942 if (ret)
d4452bc5 943 goto fail;
2f356126 944
a67509c3 945 if (e->bitmap) {
d4452bc5
CM
946 list_add_tail(&e->list, bitmap_list);
947 *bitmaps += 1;
2f356126 948 }
a67509c3
JB
949 node = rb_next(node);
950 if (!node && cluster) {
951 node = rb_first(&cluster->root);
1bbc621e
CM
952 cluster_locked = cluster;
953 spin_lock(&cluster_locked->lock);
a67509c3 954 cluster = NULL;
43be2146 955 }
a67509c3 956 }
1bbc621e
CM
957 if (cluster_locked) {
958 spin_unlock(&cluster_locked->lock);
959 cluster_locked = NULL;
960 }
55507ce3
FM
961
962 /*
963 * Make sure we don't miss any range that was removed from our rbtree
964 * because trimming is running. Otherwise after a umount+mount (or crash
965 * after committing the transaction) we would leak free space and get
966 * an inconsistent free space cache report from fsck.
967 */
968 list_for_each_entry(trim_entry, &ctl->trimming_ranges, list) {
969 ret = io_ctl_add_entry(io_ctl, trim_entry->start,
970 trim_entry->bytes, NULL);
971 if (ret)
972 goto fail;
973 *entries += 1;
974 }
975
d4452bc5
CM
976 return 0;
977fail:
1bbc621e
CM
978 if (cluster_locked)
979 spin_unlock(&cluster_locked->lock);
d4452bc5
CM
980 return -ENOSPC;
981}
982
983static noinline_for_stack int
984update_cache_item(struct btrfs_trans_handle *trans,
985 struct btrfs_root *root,
986 struct inode *inode,
987 struct btrfs_path *path, u64 offset,
988 int entries, int bitmaps)
989{
990 struct btrfs_key key;
991 struct btrfs_free_space_header *header;
992 struct extent_buffer *leaf;
993 int ret;
994
995 key.objectid = BTRFS_FREE_SPACE_OBJECTID;
996 key.offset = offset;
997 key.type = 0;
998
999 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
1000 if (ret < 0) {
1001 clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, inode->i_size - 1,
e182163d 1002 EXTENT_DELALLOC, 0, 0, NULL);
d4452bc5
CM
1003 goto fail;
1004 }
1005 leaf = path->nodes[0];
1006 if (ret > 0) {
1007 struct btrfs_key found_key;
1008 ASSERT(path->slots[0]);
1009 path->slots[0]--;
1010 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
1011 if (found_key.objectid != BTRFS_FREE_SPACE_OBJECTID ||
1012 found_key.offset != offset) {
1013 clear_extent_bit(&BTRFS_I(inode)->io_tree, 0,
e182163d
OS
1014 inode->i_size - 1, EXTENT_DELALLOC, 0,
1015 0, NULL);
d4452bc5
CM
1016 btrfs_release_path(path);
1017 goto fail;
1018 }
1019 }
1020
1021 BTRFS_I(inode)->generation = trans->transid;
1022 header = btrfs_item_ptr(leaf, path->slots[0],
1023 struct btrfs_free_space_header);
1024 btrfs_set_free_space_entries(leaf, header, entries);
1025 btrfs_set_free_space_bitmaps(leaf, header, bitmaps);
1026 btrfs_set_free_space_generation(leaf, header, trans->transid);
1027 btrfs_mark_buffer_dirty(leaf);
1028 btrfs_release_path(path);
1029
1030 return 0;
1031
1032fail:
1033 return -1;
1034}
1035
6701bdb3 1036static noinline_for_stack int write_pinned_extent_entries(
6b45f641 1037 struct btrfs_trans_handle *trans,
32da5386 1038 struct btrfs_block_group *block_group,
4c6d1d85 1039 struct btrfs_io_ctl *io_ctl,
5349d6c3 1040 int *entries)
d4452bc5
CM
1041{
1042 u64 start, extent_start, extent_end, len;
d4452bc5
CM
1043 struct extent_io_tree *unpin = NULL;
1044 int ret;
43be2146 1045
5349d6c3
MX
1046 if (!block_group)
1047 return 0;
1048
a67509c3
JB
1049 /*
1050 * We want to add any pinned extents to our free space cache
1051 * so we don't leak the space
d4452bc5 1052 *
db804f23
LZ
1053 * We shouldn't have switched the pinned extents yet so this is the
1054 * right one
1055 */
fe119a6e 1056 unpin = &trans->transaction->pinned_extents;
db804f23 1057
b3470b5d 1058 start = block_group->start;
db804f23 1059
b3470b5d 1060 while (start < block_group->start + block_group->length) {
db804f23
LZ
1061 ret = find_first_extent_bit(unpin, start,
1062 &extent_start, &extent_end,
e6138876 1063 EXTENT_DIRTY, NULL);
5349d6c3
MX
1064 if (ret)
1065 return 0;
0cb59c99 1066
a67509c3 1067 /* This pinned extent is out of our range */
b3470b5d 1068 if (extent_start >= block_group->start + block_group->length)
5349d6c3 1069 return 0;
2f356126 1070
db804f23 1071 extent_start = max(extent_start, start);
b3470b5d
DS
1072 extent_end = min(block_group->start + block_group->length,
1073 extent_end + 1);
db804f23 1074 len = extent_end - extent_start;
0cb59c99 1075
d4452bc5
CM
1076 *entries += 1;
1077 ret = io_ctl_add_entry(io_ctl, extent_start, len, NULL);
a67509c3 1078 if (ret)
5349d6c3 1079 return -ENOSPC;
0cb59c99 1080
db804f23 1081 start = extent_end;
a67509c3 1082 }
0cb59c99 1083
5349d6c3
MX
1084 return 0;
1085}
1086
1087static noinline_for_stack int
4c6d1d85 1088write_bitmap_entries(struct btrfs_io_ctl *io_ctl, struct list_head *bitmap_list)
5349d6c3 1089{
7ae1681e 1090 struct btrfs_free_space *entry, *next;
5349d6c3
MX
1091 int ret;
1092
0cb59c99 1093 /* Write out the bitmaps */
7ae1681e 1094 list_for_each_entry_safe(entry, next, bitmap_list, list) {
d4452bc5 1095 ret = io_ctl_add_bitmap(io_ctl, entry->bitmap);
a67509c3 1096 if (ret)
5349d6c3 1097 return -ENOSPC;
0cb59c99 1098 list_del_init(&entry->list);
be1a12a0
JB
1099 }
1100
5349d6c3
MX
1101 return 0;
1102}
0cb59c99 1103
5349d6c3
MX
1104static int flush_dirty_cache(struct inode *inode)
1105{
1106 int ret;
be1a12a0 1107
0ef8b726 1108 ret = btrfs_wait_ordered_range(inode, 0, (u64)-1);
5349d6c3 1109 if (ret)
0ef8b726 1110 clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, inode->i_size - 1,
e182163d 1111 EXTENT_DELALLOC, 0, 0, NULL);
0cb59c99 1112
5349d6c3 1113 return ret;
d4452bc5
CM
1114}
1115
1116static void noinline_for_stack
a3bdccc4 1117cleanup_bitmap_list(struct list_head *bitmap_list)
d4452bc5 1118{
7ae1681e 1119 struct btrfs_free_space *entry, *next;
5349d6c3 1120
7ae1681e 1121 list_for_each_entry_safe(entry, next, bitmap_list, list)
d4452bc5 1122 list_del_init(&entry->list);
a3bdccc4
CM
1123}
1124
1125static void noinline_for_stack
1126cleanup_write_cache_enospc(struct inode *inode,
1127 struct btrfs_io_ctl *io_ctl,
7bf1a159 1128 struct extent_state **cached_state)
a3bdccc4 1129{
d4452bc5
CM
1130 io_ctl_drop_pages(io_ctl);
1131 unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0,
e43bbe5e 1132 i_size_read(inode) - 1, cached_state);
d4452bc5 1133}
549b4fdb 1134
afdb5718
JM
1135static int __btrfs_wait_cache_io(struct btrfs_root *root,
1136 struct btrfs_trans_handle *trans,
32da5386 1137 struct btrfs_block_group *block_group,
afdb5718
JM
1138 struct btrfs_io_ctl *io_ctl,
1139 struct btrfs_path *path, u64 offset)
c9dc4c65
CM
1140{
1141 int ret;
1142 struct inode *inode = io_ctl->inode;
1143
1bbc621e
CM
1144 if (!inode)
1145 return 0;
1146
c9dc4c65
CM
1147 /* Flush the dirty pages in the cache file. */
1148 ret = flush_dirty_cache(inode);
1149 if (ret)
1150 goto out;
1151
1152 /* Update the cache item to tell everyone this cache file is valid. */
1153 ret = update_cache_item(trans, root, inode, path, offset,
1154 io_ctl->entries, io_ctl->bitmaps);
1155out:
c9dc4c65
CM
1156 if (ret) {
1157 invalidate_inode_pages2(inode->i_mapping);
1158 BTRFS_I(inode)->generation = 0;
bbcd1f4d
FM
1159 if (block_group)
1160 btrfs_debug(root->fs_info,
2e69a7a6
FM
1161 "failed to write free space cache for block group %llu error %d",
1162 block_group->start, ret);
c9dc4c65 1163 }
9a56fcd1 1164 btrfs_update_inode(trans, root, BTRFS_I(inode));
c9dc4c65
CM
1165
1166 if (block_group) {
1bbc621e
CM
1167 /* the dirty list is protected by the dirty_bgs_lock */
1168 spin_lock(&trans->transaction->dirty_bgs_lock);
1169
1170 /* the disk_cache_state is protected by the block group lock */
c9dc4c65
CM
1171 spin_lock(&block_group->lock);
1172
1173 /*
1174 * only mark this as written if we didn't get put back on
1bbc621e
CM
1175 * the dirty list while waiting for IO. Otherwise our
1176 * cache state won't be right, and we won't get written again
c9dc4c65
CM
1177 */
1178 if (!ret && list_empty(&block_group->dirty_list))
1179 block_group->disk_cache_state = BTRFS_DC_WRITTEN;
1180 else if (ret)
1181 block_group->disk_cache_state = BTRFS_DC_ERROR;
1182
1183 spin_unlock(&block_group->lock);
1bbc621e 1184 spin_unlock(&trans->transaction->dirty_bgs_lock);
c9dc4c65
CM
1185 io_ctl->inode = NULL;
1186 iput(inode);
1187 }
1188
1189 return ret;
1190
1191}
1192
afdb5718 1193int btrfs_wait_cache_io(struct btrfs_trans_handle *trans,
32da5386 1194 struct btrfs_block_group *block_group,
afdb5718
JM
1195 struct btrfs_path *path)
1196{
1197 return __btrfs_wait_cache_io(block_group->fs_info->tree_root, trans,
1198 block_group, &block_group->io_ctl,
b3470b5d 1199 path, block_group->start);
afdb5718
JM
1200}
1201
d4452bc5
CM
1202/**
1203 * __btrfs_write_out_cache - write out cached info to an inode
1204 * @root - the root the inode belongs to
1205 * @ctl - the free space cache we are going to write out
1206 * @block_group - the block_group for this cache if it belongs to a block_group
1207 * @trans - the trans handle
d4452bc5
CM
1208 *
1209 * This function writes out a free space cache struct to disk for quick recovery
8cd1e731 1210 * on mount. This will return 0 if it was successful in writing the cache out,
b8605454 1211 * or an errno if it was not.
d4452bc5
CM
1212 */
1213static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
1214 struct btrfs_free_space_ctl *ctl,
32da5386 1215 struct btrfs_block_group *block_group,
c9dc4c65 1216 struct btrfs_io_ctl *io_ctl,
0e8d931a 1217 struct btrfs_trans_handle *trans)
d4452bc5
CM
1218{
1219 struct extent_state *cached_state = NULL;
5349d6c3 1220 LIST_HEAD(bitmap_list);
d4452bc5
CM
1221 int entries = 0;
1222 int bitmaps = 0;
1223 int ret;
c9dc4c65 1224 int must_iput = 0;
d4452bc5
CM
1225
1226 if (!i_size_read(inode))
b8605454 1227 return -EIO;
d4452bc5 1228
c9dc4c65 1229 WARN_ON(io_ctl->pages);
f15376df 1230 ret = io_ctl_init(io_ctl, inode, 1);
d4452bc5 1231 if (ret)
b8605454 1232 return ret;
d4452bc5 1233
e570fd27
MX
1234 if (block_group && (block_group->flags & BTRFS_BLOCK_GROUP_DATA)) {
1235 down_write(&block_group->data_rwsem);
1236 spin_lock(&block_group->lock);
1237 if (block_group->delalloc_bytes) {
1238 block_group->disk_cache_state = BTRFS_DC_WRITTEN;
1239 spin_unlock(&block_group->lock);
1240 up_write(&block_group->data_rwsem);
1241 BTRFS_I(inode)->generation = 0;
1242 ret = 0;
c9dc4c65 1243 must_iput = 1;
e570fd27
MX
1244 goto out;
1245 }
1246 spin_unlock(&block_group->lock);
1247 }
1248
d4452bc5 1249 /* Lock all pages first so we can lock the extent safely. */
7a195f6d 1250 ret = io_ctl_prepare_pages(io_ctl, false);
b8605454 1251 if (ret)
b77000ed 1252 goto out_unlock;
d4452bc5
CM
1253
1254 lock_extent_bits(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1,
ff13db41 1255 &cached_state);
d4452bc5 1256
c9dc4c65 1257 io_ctl_set_generation(io_ctl, trans->transid);
d4452bc5 1258
55507ce3 1259 mutex_lock(&ctl->cache_writeout_mutex);
5349d6c3 1260 /* Write out the extent entries in the free space cache */
1bbc621e 1261 spin_lock(&ctl->tree_lock);
c9dc4c65 1262 ret = write_cache_extent_entries(io_ctl, ctl,
d4452bc5
CM
1263 block_group, &entries, &bitmaps,
1264 &bitmap_list);
a3bdccc4
CM
1265 if (ret)
1266 goto out_nospc_locked;
d4452bc5 1267
5349d6c3
MX
1268 /*
1269 * Some spaces that are freed in the current transaction are pinned,
1270 * they will be added into free space cache after the transaction is
1271 * committed, we shouldn't lose them.
1bbc621e
CM
1272 *
1273 * If this changes while we are working we'll get added back to
1274 * the dirty list and redo it. No locking needed
5349d6c3 1275 */
6b45f641 1276 ret = write_pinned_extent_entries(trans, block_group, io_ctl, &entries);
a3bdccc4
CM
1277 if (ret)
1278 goto out_nospc_locked;
5349d6c3 1279
55507ce3
FM
1280 /*
1281 * At last, we write out all the bitmaps and keep cache_writeout_mutex
1282 * locked while doing it because a concurrent trim can be manipulating
1283 * or freeing the bitmap.
1284 */
c9dc4c65 1285 ret = write_bitmap_entries(io_ctl, &bitmap_list);
1bbc621e 1286 spin_unlock(&ctl->tree_lock);
55507ce3 1287 mutex_unlock(&ctl->cache_writeout_mutex);
5349d6c3
MX
1288 if (ret)
1289 goto out_nospc;
1290
1291 /* Zero out the rest of the pages just to make sure */
c9dc4c65 1292 io_ctl_zero_remaining_pages(io_ctl);
d4452bc5 1293
5349d6c3 1294 /* Everything is written out, now we dirty the pages in the file. */
088545f6
NB
1295 ret = btrfs_dirty_pages(BTRFS_I(inode), io_ctl->pages,
1296 io_ctl->num_pages, 0, i_size_read(inode),
aa8c1a41 1297 &cached_state, false);
5349d6c3 1298 if (ret)
d4452bc5 1299 goto out_nospc;
5349d6c3 1300
e570fd27
MX
1301 if (block_group && (block_group->flags & BTRFS_BLOCK_GROUP_DATA))
1302 up_write(&block_group->data_rwsem);
5349d6c3
MX
1303 /*
1304 * Release the pages and unlock the extent, we will flush
1305 * them out later
1306 */
c9dc4c65 1307 io_ctl_drop_pages(io_ctl);
bbc37d6e 1308 io_ctl_free(io_ctl);
5349d6c3
MX
1309
1310 unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0,
e43bbe5e 1311 i_size_read(inode) - 1, &cached_state);
5349d6c3 1312
c9dc4c65
CM
1313 /*
1314 * at this point the pages are under IO and we're happy,
260db43c 1315 * The caller is responsible for waiting on them and updating
c9dc4c65
CM
1316 * the cache and the inode
1317 */
1318 io_ctl->entries = entries;
1319 io_ctl->bitmaps = bitmaps;
1320
1321 ret = btrfs_fdatawrite_range(inode, 0, (u64)-1);
5349d6c3 1322 if (ret)
d4452bc5
CM
1323 goto out;
1324
c9dc4c65
CM
1325 return 0;
1326
a3bdccc4
CM
1327out_nospc_locked:
1328 cleanup_bitmap_list(&bitmap_list);
1329 spin_unlock(&ctl->tree_lock);
1330 mutex_unlock(&ctl->cache_writeout_mutex);
1331
a67509c3 1332out_nospc:
7bf1a159 1333 cleanup_write_cache_enospc(inode, io_ctl, &cached_state);
e570fd27 1334
b77000ed 1335out_unlock:
e570fd27
MX
1336 if (block_group && (block_group->flags & BTRFS_BLOCK_GROUP_DATA))
1337 up_write(&block_group->data_rwsem);
1338
fd8efa81
JT
1339out:
1340 io_ctl->inode = NULL;
1341 io_ctl_free(io_ctl);
1342 if (ret) {
1343 invalidate_inode_pages2(inode->i_mapping);
1344 BTRFS_I(inode)->generation = 0;
1345 }
9a56fcd1 1346 btrfs_update_inode(trans, root, BTRFS_I(inode));
fd8efa81
JT
1347 if (must_iput)
1348 iput(inode);
1349 return ret;
0414efae
LZ
1350}
1351
fe041534 1352int btrfs_write_out_cache(struct btrfs_trans_handle *trans,
32da5386 1353 struct btrfs_block_group *block_group,
0414efae
LZ
1354 struct btrfs_path *path)
1355{
fe041534 1356 struct btrfs_fs_info *fs_info = trans->fs_info;
0414efae
LZ
1357 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
1358 struct inode *inode;
1359 int ret = 0;
1360
0414efae
LZ
1361 spin_lock(&block_group->lock);
1362 if (block_group->disk_cache_state < BTRFS_DC_SETUP) {
1363 spin_unlock(&block_group->lock);
e570fd27
MX
1364 return 0;
1365 }
0414efae
LZ
1366 spin_unlock(&block_group->lock);
1367
7949f339 1368 inode = lookup_free_space_inode(block_group, path);
0414efae
LZ
1369 if (IS_ERR(inode))
1370 return 0;
1371
77ab86bf
JM
1372 ret = __btrfs_write_out_cache(fs_info->tree_root, inode, ctl,
1373 block_group, &block_group->io_ctl, trans);
c09544e0 1374 if (ret) {
bbcd1f4d 1375 btrfs_debug(fs_info,
2e69a7a6
FM
1376 "failed to write free space cache for block group %llu error %d",
1377 block_group->start, ret);
c9dc4c65
CM
1378 spin_lock(&block_group->lock);
1379 block_group->disk_cache_state = BTRFS_DC_ERROR;
1380 spin_unlock(&block_group->lock);
1381
1382 block_group->io_ctl.inode = NULL;
1383 iput(inode);
0414efae
LZ
1384 }
1385
c9dc4c65
CM
1386 /*
1387 * if ret == 0 the caller is expected to call btrfs_wait_cache_io
1388 * to wait for IO and put the inode
1389 */
1390
0cb59c99
JB
1391 return ret;
1392}
1393
34d52cb6 1394static inline unsigned long offset_to_bit(u64 bitmap_start, u32 unit,
96303081 1395 u64 offset)
0f9dd46c 1396{
b12d6869 1397 ASSERT(offset >= bitmap_start);
96303081 1398 offset -= bitmap_start;
34d52cb6 1399 return (unsigned long)(div_u64(offset, unit));
96303081 1400}
0f9dd46c 1401
34d52cb6 1402static inline unsigned long bytes_to_bits(u64 bytes, u32 unit)
96303081 1403{
34d52cb6 1404 return (unsigned long)(div_u64(bytes, unit));
96303081 1405}
0f9dd46c 1406
34d52cb6 1407static inline u64 offset_to_bitmap(struct btrfs_free_space_ctl *ctl,
96303081
JB
1408 u64 offset)
1409{
1410 u64 bitmap_start;
0ef6447a 1411 u64 bytes_per_bitmap;
0f9dd46c 1412
34d52cb6
LZ
1413 bytes_per_bitmap = BITS_PER_BITMAP * ctl->unit;
1414 bitmap_start = offset - ctl->start;
0ef6447a 1415 bitmap_start = div64_u64(bitmap_start, bytes_per_bitmap);
96303081 1416 bitmap_start *= bytes_per_bitmap;
34d52cb6 1417 bitmap_start += ctl->start;
0f9dd46c 1418
96303081 1419 return bitmap_start;
0f9dd46c
JB
1420}
1421
96303081
JB
1422static int tree_insert_offset(struct rb_root *root, u64 offset,
1423 struct rb_node *node, int bitmap)
0f9dd46c
JB
1424{
1425 struct rb_node **p = &root->rb_node;
1426 struct rb_node *parent = NULL;
1427 struct btrfs_free_space *info;
1428
1429 while (*p) {
1430 parent = *p;
96303081 1431 info = rb_entry(parent, struct btrfs_free_space, offset_index);
0f9dd46c 1432
96303081 1433 if (offset < info->offset) {
0f9dd46c 1434 p = &(*p)->rb_left;
96303081 1435 } else if (offset > info->offset) {
0f9dd46c 1436 p = &(*p)->rb_right;
96303081
JB
1437 } else {
1438 /*
1439 * we could have a bitmap entry and an extent entry
1440 * share the same offset. If this is the case, we want
1441 * the extent entry to always be found first if we do a
1442 * linear search through the tree, since we want to have
1443 * the quickest allocation time, and allocating from an
1444 * extent is faster than allocating from a bitmap. So
1445 * if we're inserting a bitmap and we find an entry at
1446 * this offset, we want to go right, or after this entry
1447 * logically. If we are inserting an extent and we've
1448 * found a bitmap, we want to go left, or before
1449 * logically.
1450 */
1451 if (bitmap) {
207dde82
JB
1452 if (info->bitmap) {
1453 WARN_ON_ONCE(1);
1454 return -EEXIST;
1455 }
96303081
JB
1456 p = &(*p)->rb_right;
1457 } else {
207dde82
JB
1458 if (!info->bitmap) {
1459 WARN_ON_ONCE(1);
1460 return -EEXIST;
1461 }
96303081
JB
1462 p = &(*p)->rb_left;
1463 }
1464 }
0f9dd46c
JB
1465 }
1466
1467 rb_link_node(node, parent, p);
1468 rb_insert_color(node, root);
1469
1470 return 0;
1471}
1472
1473/*
70cb0743
JB
1474 * searches the tree for the given offset.
1475 *
96303081
JB
1476 * fuzzy - If this is set, then we are trying to make an allocation, and we just
1477 * want a section that has at least bytes size and comes at or after the given
1478 * offset.
0f9dd46c 1479 */
96303081 1480static struct btrfs_free_space *
34d52cb6 1481tree_search_offset(struct btrfs_free_space_ctl *ctl,
96303081 1482 u64 offset, int bitmap_only, int fuzzy)
0f9dd46c 1483{
34d52cb6 1484 struct rb_node *n = ctl->free_space_offset.rb_node;
96303081
JB
1485 struct btrfs_free_space *entry, *prev = NULL;
1486
1487 /* find entry that is closest to the 'offset' */
1488 while (1) {
1489 if (!n) {
1490 entry = NULL;
1491 break;
1492 }
0f9dd46c 1493
0f9dd46c 1494 entry = rb_entry(n, struct btrfs_free_space, offset_index);
96303081 1495 prev = entry;
0f9dd46c 1496
96303081 1497 if (offset < entry->offset)
0f9dd46c 1498 n = n->rb_left;
96303081 1499 else if (offset > entry->offset)
0f9dd46c 1500 n = n->rb_right;
96303081 1501 else
0f9dd46c 1502 break;
0f9dd46c
JB
1503 }
1504
96303081
JB
1505 if (bitmap_only) {
1506 if (!entry)
1507 return NULL;
1508 if (entry->bitmap)
1509 return entry;
0f9dd46c 1510
96303081
JB
1511 /*
1512 * bitmap entry and extent entry may share same offset,
1513 * in that case, bitmap entry comes after extent entry.
1514 */
1515 n = rb_next(n);
1516 if (!n)
1517 return NULL;
1518 entry = rb_entry(n, struct btrfs_free_space, offset_index);
1519 if (entry->offset != offset)
1520 return NULL;
0f9dd46c 1521
96303081
JB
1522 WARN_ON(!entry->bitmap);
1523 return entry;
1524 } else if (entry) {
1525 if (entry->bitmap) {
0f9dd46c 1526 /*
96303081
JB
1527 * if previous extent entry covers the offset,
1528 * we should return it instead of the bitmap entry
0f9dd46c 1529 */
de6c4115
MX
1530 n = rb_prev(&entry->offset_index);
1531 if (n) {
96303081
JB
1532 prev = rb_entry(n, struct btrfs_free_space,
1533 offset_index);
de6c4115
MX
1534 if (!prev->bitmap &&
1535 prev->offset + prev->bytes > offset)
1536 entry = prev;
0f9dd46c 1537 }
96303081
JB
1538 }
1539 return entry;
1540 }
1541
1542 if (!prev)
1543 return NULL;
1544
1545 /* find last entry before the 'offset' */
1546 entry = prev;
1547 if (entry->offset > offset) {
1548 n = rb_prev(&entry->offset_index);
1549 if (n) {
1550 entry = rb_entry(n, struct btrfs_free_space,
1551 offset_index);
b12d6869 1552 ASSERT(entry->offset <= offset);
0f9dd46c 1553 } else {
96303081
JB
1554 if (fuzzy)
1555 return entry;
1556 else
1557 return NULL;
0f9dd46c
JB
1558 }
1559 }
1560
96303081 1561 if (entry->bitmap) {
de6c4115
MX
1562 n = rb_prev(&entry->offset_index);
1563 if (n) {
96303081
JB
1564 prev = rb_entry(n, struct btrfs_free_space,
1565 offset_index);
de6c4115
MX
1566 if (!prev->bitmap &&
1567 prev->offset + prev->bytes > offset)
1568 return prev;
96303081 1569 }
34d52cb6 1570 if (entry->offset + BITS_PER_BITMAP * ctl->unit > offset)
96303081
JB
1571 return entry;
1572 } else if (entry->offset + entry->bytes > offset)
1573 return entry;
1574
1575 if (!fuzzy)
1576 return NULL;
1577
1578 while (1) {
1579 if (entry->bitmap) {
1580 if (entry->offset + BITS_PER_BITMAP *
34d52cb6 1581 ctl->unit > offset)
96303081
JB
1582 break;
1583 } else {
1584 if (entry->offset + entry->bytes > offset)
1585 break;
1586 }
1587
1588 n = rb_next(&entry->offset_index);
1589 if (!n)
1590 return NULL;
1591 entry = rb_entry(n, struct btrfs_free_space, offset_index);
1592 }
1593 return entry;
0f9dd46c
JB
1594}
1595
f333adb5 1596static inline void
34d52cb6 1597__unlink_free_space(struct btrfs_free_space_ctl *ctl,
f333adb5 1598 struct btrfs_free_space *info)
0f9dd46c 1599{
34d52cb6
LZ
1600 rb_erase(&info->offset_index, &ctl->free_space_offset);
1601 ctl->free_extents--;
dfb79ddb 1602
5dc7c10b 1603 if (!info->bitmap && !btrfs_free_space_trimmed(info)) {
dfb79ddb 1604 ctl->discardable_extents[BTRFS_STAT_CURR]--;
5dc7c10b
DZ
1605 ctl->discardable_bytes[BTRFS_STAT_CURR] -= info->bytes;
1606 }
f333adb5
LZ
1607}
1608
34d52cb6 1609static void unlink_free_space(struct btrfs_free_space_ctl *ctl,
f333adb5
LZ
1610 struct btrfs_free_space *info)
1611{
34d52cb6
LZ
1612 __unlink_free_space(ctl, info);
1613 ctl->free_space -= info->bytes;
0f9dd46c
JB
1614}
1615
34d52cb6 1616static int link_free_space(struct btrfs_free_space_ctl *ctl,
0f9dd46c
JB
1617 struct btrfs_free_space *info)
1618{
1619 int ret = 0;
1620
b12d6869 1621 ASSERT(info->bytes || info->bitmap);
34d52cb6 1622 ret = tree_insert_offset(&ctl->free_space_offset, info->offset,
96303081 1623 &info->offset_index, (info->bitmap != NULL));
0f9dd46c
JB
1624 if (ret)
1625 return ret;
1626
5dc7c10b 1627 if (!info->bitmap && !btrfs_free_space_trimmed(info)) {
dfb79ddb 1628 ctl->discardable_extents[BTRFS_STAT_CURR]++;
5dc7c10b
DZ
1629 ctl->discardable_bytes[BTRFS_STAT_CURR] += info->bytes;
1630 }
dfb79ddb 1631
34d52cb6
LZ
1632 ctl->free_space += info->bytes;
1633 ctl->free_extents++;
96303081
JB
1634 return ret;
1635}
1636
34d52cb6 1637static void recalculate_thresholds(struct btrfs_free_space_ctl *ctl)
96303081 1638{
32da5386 1639 struct btrfs_block_group *block_group = ctl->private;
25891f79
JB
1640 u64 max_bytes;
1641 u64 bitmap_bytes;
1642 u64 extent_bytes;
b3470b5d 1643 u64 size = block_group->length;
0ef6447a
FX
1644 u64 bytes_per_bg = BITS_PER_BITMAP * ctl->unit;
1645 u64 max_bitmaps = div64_u64(size + bytes_per_bg - 1, bytes_per_bg);
34d52cb6 1646
0ef6447a 1647 max_bitmaps = max_t(u64, max_bitmaps, 1);
dde5740f 1648
b12d6869 1649 ASSERT(ctl->total_bitmaps <= max_bitmaps);
96303081
JB
1650
1651 /*
5d90c5c7
DZ
1652 * We are trying to keep the total amount of memory used per 1GiB of
1653 * space to be MAX_CACHE_BYTES_PER_GIG. However, with a reclamation
1654 * mechanism of pulling extents >= FORCE_EXTENT_THRESHOLD out of
1655 * bitmaps, we may end up using more memory than this.
96303081 1656 */
ee22184b 1657 if (size < SZ_1G)
8eb2d829
LZ
1658 max_bytes = MAX_CACHE_BYTES_PER_GIG;
1659 else
ee22184b 1660 max_bytes = MAX_CACHE_BYTES_PER_GIG * div_u64(size, SZ_1G);
96303081 1661
5d90c5c7 1662 bitmap_bytes = ctl->total_bitmaps * ctl->unit;
96303081 1663
25891f79 1664 /*
f8c269d7 1665 * we want the extent entry threshold to always be at most 1/2 the max
25891f79
JB
1666 * bytes we can have, or whatever is less than that.
1667 */
1668 extent_bytes = max_bytes - bitmap_bytes;
f8c269d7 1669 extent_bytes = min_t(u64, extent_bytes, max_bytes >> 1);
96303081 1670
34d52cb6 1671 ctl->extents_thresh =
f8c269d7 1672 div_u64(extent_bytes, sizeof(struct btrfs_free_space));
96303081
JB
1673}
1674
bb3ac5a4
MX
1675static inline void __bitmap_clear_bits(struct btrfs_free_space_ctl *ctl,
1676 struct btrfs_free_space *info,
1677 u64 offset, u64 bytes)
96303081 1678{
dfb79ddb
DZ
1679 unsigned long start, count, end;
1680 int extent_delta = -1;
96303081 1681
34d52cb6
LZ
1682 start = offset_to_bit(info->offset, ctl->unit, offset);
1683 count = bytes_to_bits(bytes, ctl->unit);
dfb79ddb
DZ
1684 end = start + count;
1685 ASSERT(end <= BITS_PER_BITMAP);
96303081 1686
f38b6e75 1687 bitmap_clear(info->bitmap, start, count);
96303081
JB
1688
1689 info->bytes -= bytes;
553cceb4
JB
1690 if (info->max_extent_size > ctl->unit)
1691 info->max_extent_size = 0;
dfb79ddb
DZ
1692
1693 if (start && test_bit(start - 1, info->bitmap))
1694 extent_delta++;
1695
1696 if (end < BITS_PER_BITMAP && test_bit(end, info->bitmap))
1697 extent_delta++;
1698
1699 info->bitmap_extents += extent_delta;
5dc7c10b 1700 if (!btrfs_free_space_trimmed(info)) {
dfb79ddb 1701 ctl->discardable_extents[BTRFS_STAT_CURR] += extent_delta;
5dc7c10b
DZ
1702 ctl->discardable_bytes[BTRFS_STAT_CURR] -= bytes;
1703 }
bb3ac5a4
MX
1704}
1705
1706static void bitmap_clear_bits(struct btrfs_free_space_ctl *ctl,
1707 struct btrfs_free_space *info, u64 offset,
1708 u64 bytes)
1709{
1710 __bitmap_clear_bits(ctl, info, offset, bytes);
34d52cb6 1711 ctl->free_space -= bytes;
96303081
JB
1712}
1713
34d52cb6 1714static void bitmap_set_bits(struct btrfs_free_space_ctl *ctl,
817d52f8
JB
1715 struct btrfs_free_space *info, u64 offset,
1716 u64 bytes)
96303081 1717{
dfb79ddb
DZ
1718 unsigned long start, count, end;
1719 int extent_delta = 1;
96303081 1720
34d52cb6
LZ
1721 start = offset_to_bit(info->offset, ctl->unit, offset);
1722 count = bytes_to_bits(bytes, ctl->unit);
dfb79ddb
DZ
1723 end = start + count;
1724 ASSERT(end <= BITS_PER_BITMAP);
96303081 1725
f38b6e75 1726 bitmap_set(info->bitmap, start, count);
96303081
JB
1727
1728 info->bytes += bytes;
34d52cb6 1729 ctl->free_space += bytes;
dfb79ddb
DZ
1730
1731 if (start && test_bit(start - 1, info->bitmap))
1732 extent_delta--;
1733
1734 if (end < BITS_PER_BITMAP && test_bit(end, info->bitmap))
1735 extent_delta--;
1736
1737 info->bitmap_extents += extent_delta;
5dc7c10b 1738 if (!btrfs_free_space_trimmed(info)) {
dfb79ddb 1739 ctl->discardable_extents[BTRFS_STAT_CURR] += extent_delta;
5dc7c10b
DZ
1740 ctl->discardable_bytes[BTRFS_STAT_CURR] += bytes;
1741 }
96303081
JB
1742}
1743
a4820398
MX
1744/*
1745 * If we can not find suitable extent, we will use bytes to record
1746 * the size of the max extent.
1747 */
34d52cb6 1748static int search_bitmap(struct btrfs_free_space_ctl *ctl,
96303081 1749 struct btrfs_free_space *bitmap_info, u64 *offset,
0584f718 1750 u64 *bytes, bool for_alloc)
96303081
JB
1751{
1752 unsigned long found_bits = 0;
a4820398 1753 unsigned long max_bits = 0;
96303081
JB
1754 unsigned long bits, i;
1755 unsigned long next_zero;
a4820398 1756 unsigned long extent_bits;
96303081 1757
cef40483
JB
1758 /*
1759 * Skip searching the bitmap if we don't have a contiguous section that
1760 * is large enough for this allocation.
1761 */
0584f718
JB
1762 if (for_alloc &&
1763 bitmap_info->max_extent_size &&
cef40483
JB
1764 bitmap_info->max_extent_size < *bytes) {
1765 *bytes = bitmap_info->max_extent_size;
1766 return -1;
1767 }
1768
34d52cb6 1769 i = offset_to_bit(bitmap_info->offset, ctl->unit,
96303081 1770 max_t(u64, *offset, bitmap_info->offset));
34d52cb6 1771 bits = bytes_to_bits(*bytes, ctl->unit);
96303081 1772
ebb3dad4 1773 for_each_set_bit_from(i, bitmap_info->bitmap, BITS_PER_BITMAP) {
0584f718
JB
1774 if (for_alloc && bits == 1) {
1775 found_bits = 1;
1776 break;
1777 }
96303081
JB
1778 next_zero = find_next_zero_bit(bitmap_info->bitmap,
1779 BITS_PER_BITMAP, i);
a4820398
MX
1780 extent_bits = next_zero - i;
1781 if (extent_bits >= bits) {
1782 found_bits = extent_bits;
96303081 1783 break;
a4820398
MX
1784 } else if (extent_bits > max_bits) {
1785 max_bits = extent_bits;
96303081
JB
1786 }
1787 i = next_zero;
1788 }
1789
1790 if (found_bits) {
34d52cb6
LZ
1791 *offset = (u64)(i * ctl->unit) + bitmap_info->offset;
1792 *bytes = (u64)(found_bits) * ctl->unit;
96303081
JB
1793 return 0;
1794 }
1795
a4820398 1796 *bytes = (u64)(max_bits) * ctl->unit;
cef40483 1797 bitmap_info->max_extent_size = *bytes;
96303081
JB
1798 return -1;
1799}
1800
ad22cf6e
JB
1801static inline u64 get_max_extent_size(struct btrfs_free_space *entry)
1802{
1803 if (entry->bitmap)
1804 return entry->max_extent_size;
1805 return entry->bytes;
1806}
1807
a4820398 1808/* Cache the size of the max extent in bytes */
34d52cb6 1809static struct btrfs_free_space *
53b381b3 1810find_free_space(struct btrfs_free_space_ctl *ctl, u64 *offset, u64 *bytes,
a4820398 1811 unsigned long align, u64 *max_extent_size)
96303081
JB
1812{
1813 struct btrfs_free_space *entry;
1814 struct rb_node *node;
53b381b3
DW
1815 u64 tmp;
1816 u64 align_off;
96303081
JB
1817 int ret;
1818
34d52cb6 1819 if (!ctl->free_space_offset.rb_node)
a4820398 1820 goto out;
96303081 1821
34d52cb6 1822 entry = tree_search_offset(ctl, offset_to_bitmap(ctl, *offset), 0, 1);
96303081 1823 if (!entry)
a4820398 1824 goto out;
96303081
JB
1825
1826 for (node = &entry->offset_index; node; node = rb_next(node)) {
1827 entry = rb_entry(node, struct btrfs_free_space, offset_index);
a4820398 1828 if (entry->bytes < *bytes) {
ad22cf6e
JB
1829 *max_extent_size = max(get_max_extent_size(entry),
1830 *max_extent_size);
96303081 1831 continue;
a4820398 1832 }
96303081 1833
53b381b3
DW
1834 /* make sure the space returned is big enough
1835 * to match our requested alignment
1836 */
1837 if (*bytes >= align) {
a4820398 1838 tmp = entry->offset - ctl->start + align - 1;
47c5713f 1839 tmp = div64_u64(tmp, align);
53b381b3
DW
1840 tmp = tmp * align + ctl->start;
1841 align_off = tmp - entry->offset;
1842 } else {
1843 align_off = 0;
1844 tmp = entry->offset;
1845 }
1846
a4820398 1847 if (entry->bytes < *bytes + align_off) {
ad22cf6e
JB
1848 *max_extent_size = max(get_max_extent_size(entry),
1849 *max_extent_size);
53b381b3 1850 continue;
a4820398 1851 }
53b381b3 1852
96303081 1853 if (entry->bitmap) {
a4820398
MX
1854 u64 size = *bytes;
1855
0584f718 1856 ret = search_bitmap(ctl, entry, &tmp, &size, true);
53b381b3
DW
1857 if (!ret) {
1858 *offset = tmp;
a4820398 1859 *bytes = size;
96303081 1860 return entry;
ad22cf6e
JB
1861 } else {
1862 *max_extent_size =
1863 max(get_max_extent_size(entry),
1864 *max_extent_size);
53b381b3 1865 }
96303081
JB
1866 continue;
1867 }
1868
53b381b3
DW
1869 *offset = tmp;
1870 *bytes = entry->bytes - align_off;
96303081
JB
1871 return entry;
1872 }
a4820398 1873out:
96303081
JB
1874 return NULL;
1875}
1876
34d52cb6 1877static void add_new_bitmap(struct btrfs_free_space_ctl *ctl,
96303081
JB
1878 struct btrfs_free_space *info, u64 offset)
1879{
34d52cb6 1880 info->offset = offset_to_bitmap(ctl, offset);
f019f426 1881 info->bytes = 0;
dfb79ddb 1882 info->bitmap_extents = 0;
f2d0f676 1883 INIT_LIST_HEAD(&info->list);
34d52cb6
LZ
1884 link_free_space(ctl, info);
1885 ctl->total_bitmaps++;
96303081 1886
34d52cb6 1887 ctl->op->recalc_thresholds(ctl);
96303081
JB
1888}
1889
34d52cb6 1890static void free_bitmap(struct btrfs_free_space_ctl *ctl,
edf6e2d1
LZ
1891 struct btrfs_free_space *bitmap_info)
1892{
27f0afc7
DZ
1893 /*
1894 * Normally when this is called, the bitmap is completely empty. However,
1895 * if we are blowing up the free space cache for one reason or another
1896 * via __btrfs_remove_free_space_cache(), then it may not be freed and
1897 * we may leave stats on the table.
1898 */
1899 if (bitmap_info->bytes && !btrfs_free_space_trimmed(bitmap_info)) {
1900 ctl->discardable_extents[BTRFS_STAT_CURR] -=
1901 bitmap_info->bitmap_extents;
1902 ctl->discardable_bytes[BTRFS_STAT_CURR] -= bitmap_info->bytes;
1903
1904 }
34d52cb6 1905 unlink_free_space(ctl, bitmap_info);
3acd4850 1906 kmem_cache_free(btrfs_free_space_bitmap_cachep, bitmap_info->bitmap);
dc89e982 1907 kmem_cache_free(btrfs_free_space_cachep, bitmap_info);
34d52cb6
LZ
1908 ctl->total_bitmaps--;
1909 ctl->op->recalc_thresholds(ctl);
edf6e2d1
LZ
1910}
1911
34d52cb6 1912static noinline int remove_from_bitmap(struct btrfs_free_space_ctl *ctl,
96303081
JB
1913 struct btrfs_free_space *bitmap_info,
1914 u64 *offset, u64 *bytes)
1915{
1916 u64 end;
6606bb97
JB
1917 u64 search_start, search_bytes;
1918 int ret;
96303081
JB
1919
1920again:
34d52cb6 1921 end = bitmap_info->offset + (u64)(BITS_PER_BITMAP * ctl->unit) - 1;
96303081 1922
6606bb97 1923 /*
bdb7d303
JB
1924 * We need to search for bits in this bitmap. We could only cover some
1925 * of the extent in this bitmap thanks to how we add space, so we need
1926 * to search for as much as it as we can and clear that amount, and then
1927 * go searching for the next bit.
6606bb97
JB
1928 */
1929 search_start = *offset;
bdb7d303 1930 search_bytes = ctl->unit;
13dbc089 1931 search_bytes = min(search_bytes, end - search_start + 1);
0584f718
JB
1932 ret = search_bitmap(ctl, bitmap_info, &search_start, &search_bytes,
1933 false);
b50c6e25
JB
1934 if (ret < 0 || search_start != *offset)
1935 return -EINVAL;
6606bb97 1936
bdb7d303
JB
1937 /* We may have found more bits than what we need */
1938 search_bytes = min(search_bytes, *bytes);
1939
1940 /* Cannot clear past the end of the bitmap */
1941 search_bytes = min(search_bytes, end - search_start + 1);
1942
1943 bitmap_clear_bits(ctl, bitmap_info, search_start, search_bytes);
1944 *offset += search_bytes;
1945 *bytes -= search_bytes;
96303081
JB
1946
1947 if (*bytes) {
6606bb97 1948 struct rb_node *next = rb_next(&bitmap_info->offset_index);
edf6e2d1 1949 if (!bitmap_info->bytes)
34d52cb6 1950 free_bitmap(ctl, bitmap_info);
96303081 1951
6606bb97
JB
1952 /*
1953 * no entry after this bitmap, but we still have bytes to
1954 * remove, so something has gone wrong.
1955 */
1956 if (!next)
96303081
JB
1957 return -EINVAL;
1958
6606bb97
JB
1959 bitmap_info = rb_entry(next, struct btrfs_free_space,
1960 offset_index);
1961
1962 /*
1963 * if the next entry isn't a bitmap we need to return to let the
1964 * extent stuff do its work.
1965 */
96303081
JB
1966 if (!bitmap_info->bitmap)
1967 return -EAGAIN;
1968
6606bb97
JB
1969 /*
1970 * Ok the next item is a bitmap, but it may not actually hold
1971 * the information for the rest of this free space stuff, so
1972 * look for it, and if we don't find it return so we can try
1973 * everything over again.
1974 */
1975 search_start = *offset;
bdb7d303 1976 search_bytes = ctl->unit;
34d52cb6 1977 ret = search_bitmap(ctl, bitmap_info, &search_start,
0584f718 1978 &search_bytes, false);
6606bb97
JB
1979 if (ret < 0 || search_start != *offset)
1980 return -EAGAIN;
1981
96303081 1982 goto again;
edf6e2d1 1983 } else if (!bitmap_info->bytes)
34d52cb6 1984 free_bitmap(ctl, bitmap_info);
96303081
JB
1985
1986 return 0;
1987}
1988
2cdc342c
JB
1989static u64 add_bytes_to_bitmap(struct btrfs_free_space_ctl *ctl,
1990 struct btrfs_free_space *info, u64 offset,
da080fe1 1991 u64 bytes, enum btrfs_trim_state trim_state)
2cdc342c
JB
1992{
1993 u64 bytes_to_set = 0;
1994 u64 end;
1995
da080fe1
DZ
1996 /*
1997 * This is a tradeoff to make bitmap trim state minimal. We mark the
1998 * whole bitmap untrimmed if at any point we add untrimmed regions.
1999 */
dfb79ddb 2000 if (trim_state == BTRFS_TRIM_STATE_UNTRIMMED) {
5dc7c10b 2001 if (btrfs_free_space_trimmed(info)) {
dfb79ddb
DZ
2002 ctl->discardable_extents[BTRFS_STAT_CURR] +=
2003 info->bitmap_extents;
5dc7c10b
DZ
2004 ctl->discardable_bytes[BTRFS_STAT_CURR] += info->bytes;
2005 }
da080fe1 2006 info->trim_state = BTRFS_TRIM_STATE_UNTRIMMED;
dfb79ddb 2007 }
da080fe1 2008
2cdc342c
JB
2009 end = info->offset + (u64)(BITS_PER_BITMAP * ctl->unit);
2010
2011 bytes_to_set = min(end - offset, bytes);
2012
2013 bitmap_set_bits(ctl, info, offset, bytes_to_set);
2014
cef40483
JB
2015 /*
2016 * We set some bytes, we have no idea what the max extent size is
2017 * anymore.
2018 */
2019 info->max_extent_size = 0;
2020
2cdc342c
JB
2021 return bytes_to_set;
2022
2023}
2024
34d52cb6
LZ
2025static bool use_bitmap(struct btrfs_free_space_ctl *ctl,
2026 struct btrfs_free_space *info)
96303081 2027{
32da5386 2028 struct btrfs_block_group *block_group = ctl->private;
0b246afa 2029 struct btrfs_fs_info *fs_info = block_group->fs_info;
d0bd4560
JB
2030 bool forced = false;
2031
2032#ifdef CONFIG_BTRFS_DEBUG
2ff7e61e 2033 if (btrfs_should_fragment_free_space(block_group))
d0bd4560
JB
2034 forced = true;
2035#endif
96303081 2036
5d90c5c7
DZ
2037 /* This is a way to reclaim large regions from the bitmaps. */
2038 if (!forced && info->bytes >= FORCE_EXTENT_THRESHOLD)
2039 return false;
2040
96303081
JB
2041 /*
2042 * If we are below the extents threshold then we can add this as an
2043 * extent, and don't have to deal with the bitmap
2044 */
d0bd4560 2045 if (!forced && ctl->free_extents < ctl->extents_thresh) {
32cb0840
JB
2046 /*
2047 * If this block group has some small extents we don't want to
2048 * use up all of our free slots in the cache with them, we want
01327610 2049 * to reserve them to larger extents, however if we have plenty
32cb0840
JB
2050 * of cache left then go ahead an dadd them, no sense in adding
2051 * the overhead of a bitmap if we don't have to.
2052 */
f9bb615a
DZ
2053 if (info->bytes <= fs_info->sectorsize * 8) {
2054 if (ctl->free_extents * 3 <= ctl->extents_thresh)
34d52cb6 2055 return false;
32cb0840 2056 } else {
34d52cb6 2057 return false;
32cb0840
JB
2058 }
2059 }
96303081
JB
2060
2061 /*
dde5740f
JB
2062 * The original block groups from mkfs can be really small, like 8
2063 * megabytes, so don't bother with a bitmap for those entries. However
2064 * some block groups can be smaller than what a bitmap would cover but
2065 * are still large enough that they could overflow the 32k memory limit,
2066 * so allow those block groups to still be allowed to have a bitmap
2067 * entry.
96303081 2068 */
b3470b5d 2069 if (((BITS_PER_BITMAP * ctl->unit) >> 1) > block_group->length)
34d52cb6
LZ
2070 return false;
2071
2072 return true;
2073}
2074
20e5506b 2075static const struct btrfs_free_space_op free_space_op = {
2cdc342c
JB
2076 .recalc_thresholds = recalculate_thresholds,
2077 .use_bitmap = use_bitmap,
2078};
2079
34d52cb6
LZ
2080static int insert_into_bitmap(struct btrfs_free_space_ctl *ctl,
2081 struct btrfs_free_space *info)
2082{
2083 struct btrfs_free_space *bitmap_info;
32da5386 2084 struct btrfs_block_group *block_group = NULL;
34d52cb6 2085 int added = 0;
2cdc342c 2086 u64 bytes, offset, bytes_added;
da080fe1 2087 enum btrfs_trim_state trim_state;
34d52cb6 2088 int ret;
96303081
JB
2089
2090 bytes = info->bytes;
2091 offset = info->offset;
da080fe1 2092 trim_state = info->trim_state;
96303081 2093
34d52cb6
LZ
2094 if (!ctl->op->use_bitmap(ctl, info))
2095 return 0;
2096
2cdc342c
JB
2097 if (ctl->op == &free_space_op)
2098 block_group = ctl->private;
38e87880 2099again:
2cdc342c
JB
2100 /*
2101 * Since we link bitmaps right into the cluster we need to see if we
2102 * have a cluster here, and if so and it has our bitmap we need to add
2103 * the free space to that bitmap.
2104 */
2105 if (block_group && !list_empty(&block_group->cluster_list)) {
2106 struct btrfs_free_cluster *cluster;
2107 struct rb_node *node;
2108 struct btrfs_free_space *entry;
2109
2110 cluster = list_entry(block_group->cluster_list.next,
2111 struct btrfs_free_cluster,
2112 block_group_list);
2113 spin_lock(&cluster->lock);
2114 node = rb_first(&cluster->root);
2115 if (!node) {
2116 spin_unlock(&cluster->lock);
38e87880 2117 goto no_cluster_bitmap;
2cdc342c
JB
2118 }
2119
2120 entry = rb_entry(node, struct btrfs_free_space, offset_index);
2121 if (!entry->bitmap) {
2122 spin_unlock(&cluster->lock);
38e87880 2123 goto no_cluster_bitmap;
2cdc342c
JB
2124 }
2125
2126 if (entry->offset == offset_to_bitmap(ctl, offset)) {
da080fe1
DZ
2127 bytes_added = add_bytes_to_bitmap(ctl, entry, offset,
2128 bytes, trim_state);
2cdc342c
JB
2129 bytes -= bytes_added;
2130 offset += bytes_added;
2131 }
2132 spin_unlock(&cluster->lock);
2133 if (!bytes) {
2134 ret = 1;
2135 goto out;
2136 }
2137 }
38e87880
CM
2138
2139no_cluster_bitmap:
34d52cb6 2140 bitmap_info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset),
96303081
JB
2141 1, 0);
2142 if (!bitmap_info) {
b12d6869 2143 ASSERT(added == 0);
96303081
JB
2144 goto new_bitmap;
2145 }
2146
da080fe1
DZ
2147 bytes_added = add_bytes_to_bitmap(ctl, bitmap_info, offset, bytes,
2148 trim_state);
2cdc342c
JB
2149 bytes -= bytes_added;
2150 offset += bytes_added;
2151 added = 0;
96303081
JB
2152
2153 if (!bytes) {
2154 ret = 1;
2155 goto out;
2156 } else
2157 goto again;
2158
2159new_bitmap:
2160 if (info && info->bitmap) {
34d52cb6 2161 add_new_bitmap(ctl, info, offset);
96303081
JB
2162 added = 1;
2163 info = NULL;
2164 goto again;
2165 } else {
34d52cb6 2166 spin_unlock(&ctl->tree_lock);
96303081
JB
2167
2168 /* no pre-allocated info, allocate a new one */
2169 if (!info) {
dc89e982
JB
2170 info = kmem_cache_zalloc(btrfs_free_space_cachep,
2171 GFP_NOFS);
96303081 2172 if (!info) {
34d52cb6 2173 spin_lock(&ctl->tree_lock);
96303081
JB
2174 ret = -ENOMEM;
2175 goto out;
2176 }
2177 }
2178
2179 /* allocate the bitmap */
3acd4850
CL
2180 info->bitmap = kmem_cache_zalloc(btrfs_free_space_bitmap_cachep,
2181 GFP_NOFS);
da080fe1 2182 info->trim_state = BTRFS_TRIM_STATE_TRIMMED;
34d52cb6 2183 spin_lock(&ctl->tree_lock);
96303081
JB
2184 if (!info->bitmap) {
2185 ret = -ENOMEM;
2186 goto out;
2187 }
2188 goto again;
2189 }
2190
2191out:
2192 if (info) {
3acd4850
CL
2193 if (info->bitmap)
2194 kmem_cache_free(btrfs_free_space_bitmap_cachep,
2195 info->bitmap);
dc89e982 2196 kmem_cache_free(btrfs_free_space_cachep, info);
96303081 2197 }
0f9dd46c
JB
2198
2199 return ret;
2200}
2201
a7ccb255
DZ
2202/*
2203 * Free space merging rules:
2204 * 1) Merge trimmed areas together
2205 * 2) Let untrimmed areas coalesce with trimmed areas
2206 * 3) Always pull neighboring regions from bitmaps
2207 *
2208 * The above rules are for when we merge free space based on btrfs_trim_state.
2209 * Rules 2 and 3 are subtle because they are suboptimal, but are done for the
2210 * same reason: to promote larger extent regions which makes life easier for
2211 * find_free_extent(). Rule 2 enables coalescing based on the common path
2212 * being returning free space from btrfs_finish_extent_commit(). So when free
2213 * space is trimmed, it will prevent aggregating trimmed new region and
2214 * untrimmed regions in the rb_tree. Rule 3 is purely to obtain larger extents
2215 * and provide find_free_extent() with the largest extents possible hoping for
2216 * the reuse path.
2217 */
945d8962 2218static bool try_merge_free_space(struct btrfs_free_space_ctl *ctl,
f333adb5 2219 struct btrfs_free_space *info, bool update_stat)
0f9dd46c 2220{
bf53d468 2221 struct btrfs_free_space *left_info = NULL;
120d66ee
LZ
2222 struct btrfs_free_space *right_info;
2223 bool merged = false;
2224 u64 offset = info->offset;
2225 u64 bytes = info->bytes;
a7ccb255 2226 const bool is_trimmed = btrfs_free_space_trimmed(info);
6226cb0a 2227
0f9dd46c
JB
2228 /*
2229 * first we want to see if there is free space adjacent to the range we
2230 * are adding, if there is remove that struct and add a new one to
2231 * cover the entire range
2232 */
34d52cb6 2233 right_info = tree_search_offset(ctl, offset + bytes, 0, 0);
96303081
JB
2234 if (right_info && rb_prev(&right_info->offset_index))
2235 left_info = rb_entry(rb_prev(&right_info->offset_index),
2236 struct btrfs_free_space, offset_index);
bf53d468 2237 else if (!right_info)
34d52cb6 2238 left_info = tree_search_offset(ctl, offset - 1, 0, 0);
0f9dd46c 2239
a7ccb255
DZ
2240 /* See try_merge_free_space() comment. */
2241 if (right_info && !right_info->bitmap &&
2242 (!is_trimmed || btrfs_free_space_trimmed(right_info))) {
f333adb5 2243 if (update_stat)
34d52cb6 2244 unlink_free_space(ctl, right_info);
f333adb5 2245 else
34d52cb6 2246 __unlink_free_space(ctl, right_info);
6226cb0a 2247 info->bytes += right_info->bytes;
dc89e982 2248 kmem_cache_free(btrfs_free_space_cachep, right_info);
120d66ee 2249 merged = true;
0f9dd46c
JB
2250 }
2251
a7ccb255 2252 /* See try_merge_free_space() comment. */
96303081 2253 if (left_info && !left_info->bitmap &&
a7ccb255
DZ
2254 left_info->offset + left_info->bytes == offset &&
2255 (!is_trimmed || btrfs_free_space_trimmed(left_info))) {
f333adb5 2256 if (update_stat)
34d52cb6 2257 unlink_free_space(ctl, left_info);
f333adb5 2258 else
34d52cb6 2259 __unlink_free_space(ctl, left_info);
6226cb0a
JB
2260 info->offset = left_info->offset;
2261 info->bytes += left_info->bytes;
dc89e982 2262 kmem_cache_free(btrfs_free_space_cachep, left_info);
120d66ee 2263 merged = true;
0f9dd46c
JB
2264 }
2265
120d66ee
LZ
2266 return merged;
2267}
2268
20005523
FM
2269static bool steal_from_bitmap_to_end(struct btrfs_free_space_ctl *ctl,
2270 struct btrfs_free_space *info,
2271 bool update_stat)
2272{
2273 struct btrfs_free_space *bitmap;
2274 unsigned long i;
2275 unsigned long j;
2276 const u64 end = info->offset + info->bytes;
2277 const u64 bitmap_offset = offset_to_bitmap(ctl, end);
2278 u64 bytes;
2279
2280 bitmap = tree_search_offset(ctl, bitmap_offset, 1, 0);
2281 if (!bitmap)
2282 return false;
2283
2284 i = offset_to_bit(bitmap->offset, ctl->unit, end);
2285 j = find_next_zero_bit(bitmap->bitmap, BITS_PER_BITMAP, i);
2286 if (j == i)
2287 return false;
2288 bytes = (j - i) * ctl->unit;
2289 info->bytes += bytes;
2290
a7ccb255
DZ
2291 /* See try_merge_free_space() comment. */
2292 if (!btrfs_free_space_trimmed(bitmap))
2293 info->trim_state = BTRFS_TRIM_STATE_UNTRIMMED;
2294
20005523
FM
2295 if (update_stat)
2296 bitmap_clear_bits(ctl, bitmap, end, bytes);
2297 else
2298 __bitmap_clear_bits(ctl, bitmap, end, bytes);
2299
2300 if (!bitmap->bytes)
2301 free_bitmap(ctl, bitmap);
2302
2303 return true;
2304}
2305
2306static bool steal_from_bitmap_to_front(struct btrfs_free_space_ctl *ctl,
2307 struct btrfs_free_space *info,
2308 bool update_stat)
2309{
2310 struct btrfs_free_space *bitmap;
2311 u64 bitmap_offset;
2312 unsigned long i;
2313 unsigned long j;
2314 unsigned long prev_j;
2315 u64 bytes;
2316
2317 bitmap_offset = offset_to_bitmap(ctl, info->offset);
2318 /* If we're on a boundary, try the previous logical bitmap. */
2319 if (bitmap_offset == info->offset) {
2320 if (info->offset == 0)
2321 return false;
2322 bitmap_offset = offset_to_bitmap(ctl, info->offset - 1);
2323 }
2324
2325 bitmap = tree_search_offset(ctl, bitmap_offset, 1, 0);
2326 if (!bitmap)
2327 return false;
2328
2329 i = offset_to_bit(bitmap->offset, ctl->unit, info->offset) - 1;
2330 j = 0;
2331 prev_j = (unsigned long)-1;
2332 for_each_clear_bit_from(j, bitmap->bitmap, BITS_PER_BITMAP) {
2333 if (j > i)
2334 break;
2335 prev_j = j;
2336 }
2337 if (prev_j == i)
2338 return false;
2339
2340 if (prev_j == (unsigned long)-1)
2341 bytes = (i + 1) * ctl->unit;
2342 else
2343 bytes = (i - prev_j) * ctl->unit;
2344
2345 info->offset -= bytes;
2346 info->bytes += bytes;
2347
a7ccb255
DZ
2348 /* See try_merge_free_space() comment. */
2349 if (!btrfs_free_space_trimmed(bitmap))
2350 info->trim_state = BTRFS_TRIM_STATE_UNTRIMMED;
2351
20005523
FM
2352 if (update_stat)
2353 bitmap_clear_bits(ctl, bitmap, info->offset, bytes);
2354 else
2355 __bitmap_clear_bits(ctl, bitmap, info->offset, bytes);
2356
2357 if (!bitmap->bytes)
2358 free_bitmap(ctl, bitmap);
2359
2360 return true;
2361}
2362
2363/*
2364 * We prefer always to allocate from extent entries, both for clustered and
2365 * non-clustered allocation requests. So when attempting to add a new extent
2366 * entry, try to see if there's adjacent free space in bitmap entries, and if
2367 * there is, migrate that space from the bitmaps to the extent.
2368 * Like this we get better chances of satisfying space allocation requests
2369 * because we attempt to satisfy them based on a single cache entry, and never
2370 * on 2 or more entries - even if the entries represent a contiguous free space
2371 * region (e.g. 1 extent entry + 1 bitmap entry starting where the extent entry
2372 * ends).
2373 */
2374static void steal_from_bitmap(struct btrfs_free_space_ctl *ctl,
2375 struct btrfs_free_space *info,
2376 bool update_stat)
2377{
2378 /*
2379 * Only work with disconnected entries, as we can change their offset,
2380 * and must be extent entries.
2381 */
2382 ASSERT(!info->bitmap);
2383 ASSERT(RB_EMPTY_NODE(&info->offset_index));
2384
2385 if (ctl->total_bitmaps > 0) {
2386 bool stole_end;
2387 bool stole_front = false;
2388
2389 stole_end = steal_from_bitmap_to_end(ctl, info, update_stat);
2390 if (ctl->total_bitmaps > 0)
2391 stole_front = steal_from_bitmap_to_front(ctl, info,
2392 update_stat);
2393
2394 if (stole_end || stole_front)
2395 try_merge_free_space(ctl, info, update_stat);
2396 }
2397}
2398
ab8d0fc4
JM
2399int __btrfs_add_free_space(struct btrfs_fs_info *fs_info,
2400 struct btrfs_free_space_ctl *ctl,
a7ccb255
DZ
2401 u64 offset, u64 bytes,
2402 enum btrfs_trim_state trim_state)
120d66ee 2403{
b0643e59 2404 struct btrfs_block_group *block_group = ctl->private;
120d66ee
LZ
2405 struct btrfs_free_space *info;
2406 int ret = 0;
7fe6d45e 2407 u64 filter_bytes = bytes;
120d66ee 2408
dc89e982 2409 info = kmem_cache_zalloc(btrfs_free_space_cachep, GFP_NOFS);
120d66ee
LZ
2410 if (!info)
2411 return -ENOMEM;
2412
2413 info->offset = offset;
2414 info->bytes = bytes;
a7ccb255 2415 info->trim_state = trim_state;
20005523 2416 RB_CLEAR_NODE(&info->offset_index);
120d66ee 2417
34d52cb6 2418 spin_lock(&ctl->tree_lock);
120d66ee 2419
34d52cb6 2420 if (try_merge_free_space(ctl, info, true))
120d66ee
LZ
2421 goto link;
2422
2423 /*
2424 * There was no extent directly to the left or right of this new
2425 * extent then we know we're going to have to allocate a new extent, so
2426 * before we do that see if we need to drop this into a bitmap
2427 */
34d52cb6 2428 ret = insert_into_bitmap(ctl, info);
120d66ee
LZ
2429 if (ret < 0) {
2430 goto out;
2431 } else if (ret) {
2432 ret = 0;
2433 goto out;
2434 }
2435link:
20005523
FM
2436 /*
2437 * Only steal free space from adjacent bitmaps if we're sure we're not
2438 * going to add the new free space to existing bitmap entries - because
2439 * that would mean unnecessary work that would be reverted. Therefore
2440 * attempt to steal space from bitmaps if we're adding an extent entry.
2441 */
2442 steal_from_bitmap(ctl, info, true);
2443
7fe6d45e
DZ
2444 filter_bytes = max(filter_bytes, info->bytes);
2445
34d52cb6 2446 ret = link_free_space(ctl, info);
0f9dd46c 2447 if (ret)
dc89e982 2448 kmem_cache_free(btrfs_free_space_cachep, info);
96303081 2449out:
66b53bae 2450 btrfs_discard_update_discardable(block_group);
34d52cb6 2451 spin_unlock(&ctl->tree_lock);
6226cb0a 2452
0f9dd46c 2453 if (ret) {
ab8d0fc4 2454 btrfs_crit(fs_info, "unable to add free space :%d", ret);
b12d6869 2455 ASSERT(ret != -EEXIST);
0f9dd46c
JB
2456 }
2457
7fe6d45e
DZ
2458 if (trim_state != BTRFS_TRIM_STATE_TRIMMED) {
2459 btrfs_discard_check_filter(block_group, filter_bytes);
b0643e59 2460 btrfs_discard_queue_work(&fs_info->discard_ctl, block_group);
7fe6d45e 2461 }
b0643e59 2462
0f9dd46c
JB
2463 return ret;
2464}
2465
32da5386 2466int btrfs_add_free_space(struct btrfs_block_group *block_group,
478b4d9f
JB
2467 u64 bytenr, u64 size)
2468{
a7ccb255
DZ
2469 enum btrfs_trim_state trim_state = BTRFS_TRIM_STATE_UNTRIMMED;
2470
2471 if (btrfs_test_opt(block_group->fs_info, DISCARD_SYNC))
2472 trim_state = BTRFS_TRIM_STATE_TRIMMED;
2473
478b4d9f
JB
2474 return __btrfs_add_free_space(block_group->fs_info,
2475 block_group->free_space_ctl,
a7ccb255 2476 bytenr, size, trim_state);
478b4d9f
JB
2477}
2478
b0643e59
DZ
2479/*
2480 * This is a subtle distinction because when adding free space back in general,
2481 * we want it to be added as untrimmed for async. But in the case where we add
2482 * it on loading of a block group, we want to consider it trimmed.
2483 */
2484int btrfs_add_free_space_async_trimmed(struct btrfs_block_group *block_group,
2485 u64 bytenr, u64 size)
2486{
2487 enum btrfs_trim_state trim_state = BTRFS_TRIM_STATE_UNTRIMMED;
2488
2489 if (btrfs_test_opt(block_group->fs_info, DISCARD_SYNC) ||
2490 btrfs_test_opt(block_group->fs_info, DISCARD_ASYNC))
2491 trim_state = BTRFS_TRIM_STATE_TRIMMED;
2492
2493 return __btrfs_add_free_space(block_group->fs_info,
2494 block_group->free_space_ctl,
2495 bytenr, size, trim_state);
2496}
2497
32da5386 2498int btrfs_remove_free_space(struct btrfs_block_group *block_group,
6226cb0a 2499 u64 offset, u64 bytes)
0f9dd46c 2500{
34d52cb6 2501 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
0f9dd46c 2502 struct btrfs_free_space *info;
b0175117
JB
2503 int ret;
2504 bool re_search = false;
0f9dd46c 2505
34d52cb6 2506 spin_lock(&ctl->tree_lock);
6226cb0a 2507
96303081 2508again:
b0175117 2509 ret = 0;
bdb7d303
JB
2510 if (!bytes)
2511 goto out_lock;
2512
34d52cb6 2513 info = tree_search_offset(ctl, offset, 0, 0);
96303081 2514 if (!info) {
6606bb97
JB
2515 /*
2516 * oops didn't find an extent that matched the space we wanted
2517 * to remove, look for a bitmap instead
2518 */
34d52cb6 2519 info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset),
6606bb97
JB
2520 1, 0);
2521 if (!info) {
b0175117
JB
2522 /*
2523 * If we found a partial bit of our free space in a
2524 * bitmap but then couldn't find the other part this may
2525 * be a problem, so WARN about it.
24a70313 2526 */
b0175117 2527 WARN_ON(re_search);
6606bb97
JB
2528 goto out_lock;
2529 }
96303081
JB
2530 }
2531
b0175117 2532 re_search = false;
bdb7d303 2533 if (!info->bitmap) {
34d52cb6 2534 unlink_free_space(ctl, info);
bdb7d303
JB
2535 if (offset == info->offset) {
2536 u64 to_free = min(bytes, info->bytes);
2537
2538 info->bytes -= to_free;
2539 info->offset += to_free;
2540 if (info->bytes) {
2541 ret = link_free_space(ctl, info);
2542 WARN_ON(ret);
2543 } else {
2544 kmem_cache_free(btrfs_free_space_cachep, info);
2545 }
0f9dd46c 2546
bdb7d303
JB
2547 offset += to_free;
2548 bytes -= to_free;
2549 goto again;
2550 } else {
2551 u64 old_end = info->bytes + info->offset;
9b49c9b9 2552
bdb7d303 2553 info->bytes = offset - info->offset;
34d52cb6 2554 ret = link_free_space(ctl, info);
96303081
JB
2555 WARN_ON(ret);
2556 if (ret)
2557 goto out_lock;
96303081 2558
bdb7d303
JB
2559 /* Not enough bytes in this entry to satisfy us */
2560 if (old_end < offset + bytes) {
2561 bytes -= old_end - offset;
2562 offset = old_end;
2563 goto again;
2564 } else if (old_end == offset + bytes) {
2565 /* all done */
2566 goto out_lock;
2567 }
2568 spin_unlock(&ctl->tree_lock);
2569
a7ccb255
DZ
2570 ret = __btrfs_add_free_space(block_group->fs_info, ctl,
2571 offset + bytes,
2572 old_end - (offset + bytes),
2573 info->trim_state);
bdb7d303
JB
2574 WARN_ON(ret);
2575 goto out;
2576 }
0f9dd46c 2577 }
96303081 2578
34d52cb6 2579 ret = remove_from_bitmap(ctl, info, &offset, &bytes);
b0175117
JB
2580 if (ret == -EAGAIN) {
2581 re_search = true;
96303081 2582 goto again;
b0175117 2583 }
96303081 2584out_lock:
66b53bae 2585 btrfs_discard_update_discardable(block_group);
34d52cb6 2586 spin_unlock(&ctl->tree_lock);
0f9dd46c 2587out:
25179201
JB
2588 return ret;
2589}
2590
32da5386 2591void btrfs_dump_free_space(struct btrfs_block_group *block_group,
0f9dd46c
JB
2592 u64 bytes)
2593{
0b246afa 2594 struct btrfs_fs_info *fs_info = block_group->fs_info;
34d52cb6 2595 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
0f9dd46c
JB
2596 struct btrfs_free_space *info;
2597 struct rb_node *n;
2598 int count = 0;
2599
9084cb6a 2600 spin_lock(&ctl->tree_lock);
34d52cb6 2601 for (n = rb_first(&ctl->free_space_offset); n; n = rb_next(n)) {
0f9dd46c 2602 info = rb_entry(n, struct btrfs_free_space, offset_index);
f6175efa 2603 if (info->bytes >= bytes && !block_group->ro)
0f9dd46c 2604 count++;
0b246afa 2605 btrfs_crit(fs_info, "entry offset %llu, bytes %llu, bitmap %s",
efe120a0 2606 info->offset, info->bytes,
96303081 2607 (info->bitmap) ? "yes" : "no");
0f9dd46c 2608 }
9084cb6a 2609 spin_unlock(&ctl->tree_lock);
0b246afa 2610 btrfs_info(fs_info, "block group has cluster?: %s",
96303081 2611 list_empty(&block_group->cluster_list) ? "no" : "yes");
0b246afa 2612 btrfs_info(fs_info,
efe120a0 2613 "%d blocks of free space at or bigger than bytes is", count);
0f9dd46c
JB
2614}
2615
cd79909b
JB
2616void btrfs_init_free_space_ctl(struct btrfs_block_group *block_group,
2617 struct btrfs_free_space_ctl *ctl)
0f9dd46c 2618{
0b246afa 2619 struct btrfs_fs_info *fs_info = block_group->fs_info;
0f9dd46c 2620
34d52cb6 2621 spin_lock_init(&ctl->tree_lock);
0b246afa 2622 ctl->unit = fs_info->sectorsize;
b3470b5d 2623 ctl->start = block_group->start;
34d52cb6
LZ
2624 ctl->private = block_group;
2625 ctl->op = &free_space_op;
55507ce3
FM
2626 INIT_LIST_HEAD(&ctl->trimming_ranges);
2627 mutex_init(&ctl->cache_writeout_mutex);
0f9dd46c 2628
34d52cb6
LZ
2629 /*
2630 * we only want to have 32k of ram per block group for keeping
2631 * track of free space, and if we pass 1/2 of that we want to
2632 * start converting things over to using bitmaps
2633 */
ee22184b 2634 ctl->extents_thresh = (SZ_32K / 2) / sizeof(struct btrfs_free_space);
0f9dd46c
JB
2635}
2636
fa9c0d79
CM
2637/*
2638 * for a given cluster, put all of its extents back into the free
2639 * space cache. If the block group passed doesn't match the block group
2640 * pointed to by the cluster, someone else raced in and freed the
2641 * cluster already. In that case, we just return without changing anything
2642 */
69b0e093 2643static void __btrfs_return_cluster_to_free_space(
32da5386 2644 struct btrfs_block_group *block_group,
fa9c0d79
CM
2645 struct btrfs_free_cluster *cluster)
2646{
34d52cb6 2647 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
fa9c0d79
CM
2648 struct btrfs_free_space *entry;
2649 struct rb_node *node;
2650
2651 spin_lock(&cluster->lock);
2652 if (cluster->block_group != block_group)
2653 goto out;
2654
96303081 2655 cluster->block_group = NULL;
fa9c0d79 2656 cluster->window_start = 0;
96303081 2657 list_del_init(&cluster->block_group_list);
96303081 2658
fa9c0d79 2659 node = rb_first(&cluster->root);
96303081 2660 while (node) {
4e69b598
JB
2661 bool bitmap;
2662
fa9c0d79
CM
2663 entry = rb_entry(node, struct btrfs_free_space, offset_index);
2664 node = rb_next(&entry->offset_index);
2665 rb_erase(&entry->offset_index, &cluster->root);
20005523 2666 RB_CLEAR_NODE(&entry->offset_index);
4e69b598
JB
2667
2668 bitmap = (entry->bitmap != NULL);
20005523 2669 if (!bitmap) {
dfb79ddb 2670 /* Merging treats extents as if they were new */
5dc7c10b 2671 if (!btrfs_free_space_trimmed(entry)) {
dfb79ddb 2672 ctl->discardable_extents[BTRFS_STAT_CURR]--;
5dc7c10b
DZ
2673 ctl->discardable_bytes[BTRFS_STAT_CURR] -=
2674 entry->bytes;
2675 }
dfb79ddb 2676
34d52cb6 2677 try_merge_free_space(ctl, entry, false);
20005523 2678 steal_from_bitmap(ctl, entry, false);
dfb79ddb
DZ
2679
2680 /* As we insert directly, update these statistics */
5dc7c10b 2681 if (!btrfs_free_space_trimmed(entry)) {
dfb79ddb 2682 ctl->discardable_extents[BTRFS_STAT_CURR]++;
5dc7c10b
DZ
2683 ctl->discardable_bytes[BTRFS_STAT_CURR] +=
2684 entry->bytes;
2685 }
20005523 2686 }
34d52cb6 2687 tree_insert_offset(&ctl->free_space_offset,
4e69b598 2688 entry->offset, &entry->offset_index, bitmap);
fa9c0d79 2689 }
6bef4d31 2690 cluster->root = RB_ROOT;
96303081 2691
fa9c0d79
CM
2692out:
2693 spin_unlock(&cluster->lock);
96303081 2694 btrfs_put_block_group(block_group);
fa9c0d79
CM
2695}
2696
48a3b636
ES
2697static void __btrfs_remove_free_space_cache_locked(
2698 struct btrfs_free_space_ctl *ctl)
0f9dd46c
JB
2699{
2700 struct btrfs_free_space *info;
2701 struct rb_node *node;
581bb050 2702
581bb050
LZ
2703 while ((node = rb_last(&ctl->free_space_offset)) != NULL) {
2704 info = rb_entry(node, struct btrfs_free_space, offset_index);
9b90f513
JB
2705 if (!info->bitmap) {
2706 unlink_free_space(ctl, info);
2707 kmem_cache_free(btrfs_free_space_cachep, info);
2708 } else {
2709 free_bitmap(ctl, info);
2710 }
351810c1
DS
2711
2712 cond_resched_lock(&ctl->tree_lock);
581bb050 2713 }
09655373
CM
2714}
2715
2716void __btrfs_remove_free_space_cache(struct btrfs_free_space_ctl *ctl)
2717{
2718 spin_lock(&ctl->tree_lock);
2719 __btrfs_remove_free_space_cache_locked(ctl);
27f0afc7 2720 if (ctl->private)
66b53bae 2721 btrfs_discard_update_discardable(ctl->private);
581bb050
LZ
2722 spin_unlock(&ctl->tree_lock);
2723}
2724
32da5386 2725void btrfs_remove_free_space_cache(struct btrfs_block_group *block_group)
581bb050
LZ
2726{
2727 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
fa9c0d79 2728 struct btrfs_free_cluster *cluster;
96303081 2729 struct list_head *head;
0f9dd46c 2730
34d52cb6 2731 spin_lock(&ctl->tree_lock);
96303081
JB
2732 while ((head = block_group->cluster_list.next) !=
2733 &block_group->cluster_list) {
2734 cluster = list_entry(head, struct btrfs_free_cluster,
2735 block_group_list);
fa9c0d79
CM
2736
2737 WARN_ON(cluster->block_group != block_group);
2738 __btrfs_return_cluster_to_free_space(block_group, cluster);
351810c1
DS
2739
2740 cond_resched_lock(&ctl->tree_lock);
fa9c0d79 2741 }
09655373 2742 __btrfs_remove_free_space_cache_locked(ctl);
66b53bae 2743 btrfs_discard_update_discardable(block_group);
34d52cb6 2744 spin_unlock(&ctl->tree_lock);
fa9c0d79 2745
0f9dd46c
JB
2746}
2747
6e80d4f8
DZ
2748/**
2749 * btrfs_is_free_space_trimmed - see if everything is trimmed
2750 * @block_group: block_group of interest
2751 *
2752 * Walk @block_group's free space rb_tree to determine if everything is trimmed.
2753 */
2754bool btrfs_is_free_space_trimmed(struct btrfs_block_group *block_group)
2755{
2756 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2757 struct btrfs_free_space *info;
2758 struct rb_node *node;
2759 bool ret = true;
2760
2761 spin_lock(&ctl->tree_lock);
2762 node = rb_first(&ctl->free_space_offset);
2763
2764 while (node) {
2765 info = rb_entry(node, struct btrfs_free_space, offset_index);
2766
2767 if (!btrfs_free_space_trimmed(info)) {
2768 ret = false;
2769 break;
2770 }
2771
2772 node = rb_next(node);
2773 }
2774
2775 spin_unlock(&ctl->tree_lock);
2776 return ret;
2777}
2778
32da5386 2779u64 btrfs_find_space_for_alloc(struct btrfs_block_group *block_group,
a4820398
MX
2780 u64 offset, u64 bytes, u64 empty_size,
2781 u64 *max_extent_size)
0f9dd46c 2782{
34d52cb6 2783 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
9ddf648f
DZ
2784 struct btrfs_discard_ctl *discard_ctl =
2785 &block_group->fs_info->discard_ctl;
6226cb0a 2786 struct btrfs_free_space *entry = NULL;
96303081 2787 u64 bytes_search = bytes + empty_size;
6226cb0a 2788 u64 ret = 0;
53b381b3
DW
2789 u64 align_gap = 0;
2790 u64 align_gap_len = 0;
a7ccb255 2791 enum btrfs_trim_state align_gap_trim_state = BTRFS_TRIM_STATE_UNTRIMMED;
0f9dd46c 2792
34d52cb6 2793 spin_lock(&ctl->tree_lock);
53b381b3 2794 entry = find_free_space(ctl, &offset, &bytes_search,
a4820398 2795 block_group->full_stripe_len, max_extent_size);
6226cb0a 2796 if (!entry)
96303081
JB
2797 goto out;
2798
2799 ret = offset;
2800 if (entry->bitmap) {
34d52cb6 2801 bitmap_clear_bits(ctl, entry, offset, bytes);
9ddf648f
DZ
2802
2803 if (!btrfs_free_space_trimmed(entry))
2804 atomic64_add(bytes, &discard_ctl->discard_bytes_saved);
2805
edf6e2d1 2806 if (!entry->bytes)
34d52cb6 2807 free_bitmap(ctl, entry);
96303081 2808 } else {
34d52cb6 2809 unlink_free_space(ctl, entry);
53b381b3
DW
2810 align_gap_len = offset - entry->offset;
2811 align_gap = entry->offset;
a7ccb255 2812 align_gap_trim_state = entry->trim_state;
53b381b3 2813
9ddf648f
DZ
2814 if (!btrfs_free_space_trimmed(entry))
2815 atomic64_add(bytes, &discard_ctl->discard_bytes_saved);
2816
53b381b3
DW
2817 entry->offset = offset + bytes;
2818 WARN_ON(entry->bytes < bytes + align_gap_len);
2819
2820 entry->bytes -= bytes + align_gap_len;
6226cb0a 2821 if (!entry->bytes)
dc89e982 2822 kmem_cache_free(btrfs_free_space_cachep, entry);
6226cb0a 2823 else
34d52cb6 2824 link_free_space(ctl, entry);
6226cb0a 2825 }
96303081 2826out:
66b53bae 2827 btrfs_discard_update_discardable(block_group);
34d52cb6 2828 spin_unlock(&ctl->tree_lock);
817d52f8 2829
53b381b3 2830 if (align_gap_len)
ab8d0fc4 2831 __btrfs_add_free_space(block_group->fs_info, ctl,
a7ccb255
DZ
2832 align_gap, align_gap_len,
2833 align_gap_trim_state);
0f9dd46c
JB
2834 return ret;
2835}
fa9c0d79
CM
2836
2837/*
2838 * given a cluster, put all of its extents back into the free space
2839 * cache. If a block group is passed, this function will only free
2840 * a cluster that belongs to the passed block group.
2841 *
2842 * Otherwise, it'll get a reference on the block group pointed to by the
2843 * cluster and remove the cluster from it.
2844 */
69b0e093 2845void btrfs_return_cluster_to_free_space(
32da5386 2846 struct btrfs_block_group *block_group,
fa9c0d79
CM
2847 struct btrfs_free_cluster *cluster)
2848{
34d52cb6 2849 struct btrfs_free_space_ctl *ctl;
fa9c0d79
CM
2850
2851 /* first, get a safe pointer to the block group */
2852 spin_lock(&cluster->lock);
2853 if (!block_group) {
2854 block_group = cluster->block_group;
2855 if (!block_group) {
2856 spin_unlock(&cluster->lock);
69b0e093 2857 return;
fa9c0d79
CM
2858 }
2859 } else if (cluster->block_group != block_group) {
2860 /* someone else has already freed it don't redo their work */
2861 spin_unlock(&cluster->lock);
69b0e093 2862 return;
fa9c0d79 2863 }
b5790d51 2864 btrfs_get_block_group(block_group);
fa9c0d79
CM
2865 spin_unlock(&cluster->lock);
2866
34d52cb6
LZ
2867 ctl = block_group->free_space_ctl;
2868
fa9c0d79 2869 /* now return any extents the cluster had on it */
34d52cb6 2870 spin_lock(&ctl->tree_lock);
69b0e093 2871 __btrfs_return_cluster_to_free_space(block_group, cluster);
34d52cb6 2872 spin_unlock(&ctl->tree_lock);
fa9c0d79 2873
6e80d4f8
DZ
2874 btrfs_discard_queue_work(&block_group->fs_info->discard_ctl, block_group);
2875
fa9c0d79
CM
2876 /* finally drop our ref */
2877 btrfs_put_block_group(block_group);
fa9c0d79
CM
2878}
2879
32da5386 2880static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group *block_group,
96303081 2881 struct btrfs_free_cluster *cluster,
4e69b598 2882 struct btrfs_free_space *entry,
a4820398
MX
2883 u64 bytes, u64 min_start,
2884 u64 *max_extent_size)
96303081 2885{
34d52cb6 2886 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
96303081
JB
2887 int err;
2888 u64 search_start = cluster->window_start;
2889 u64 search_bytes = bytes;
2890 u64 ret = 0;
2891
96303081
JB
2892 search_start = min_start;
2893 search_bytes = bytes;
2894
0584f718 2895 err = search_bitmap(ctl, entry, &search_start, &search_bytes, true);
a4820398 2896 if (err) {
ad22cf6e
JB
2897 *max_extent_size = max(get_max_extent_size(entry),
2898 *max_extent_size);
4e69b598 2899 return 0;
a4820398 2900 }
96303081
JB
2901
2902 ret = search_start;
bb3ac5a4 2903 __bitmap_clear_bits(ctl, entry, ret, bytes);
96303081
JB
2904
2905 return ret;
2906}
2907
fa9c0d79
CM
2908/*
2909 * given a cluster, try to allocate 'bytes' from it, returns 0
2910 * if it couldn't find anything suitably large, or a logical disk offset
2911 * if things worked out
2912 */
32da5386 2913u64 btrfs_alloc_from_cluster(struct btrfs_block_group *block_group,
fa9c0d79 2914 struct btrfs_free_cluster *cluster, u64 bytes,
a4820398 2915 u64 min_start, u64 *max_extent_size)
fa9c0d79 2916{
34d52cb6 2917 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
9ddf648f
DZ
2918 struct btrfs_discard_ctl *discard_ctl =
2919 &block_group->fs_info->discard_ctl;
fa9c0d79
CM
2920 struct btrfs_free_space *entry = NULL;
2921 struct rb_node *node;
2922 u64 ret = 0;
2923
2924 spin_lock(&cluster->lock);
2925 if (bytes > cluster->max_size)
2926 goto out;
2927
2928 if (cluster->block_group != block_group)
2929 goto out;
2930
2931 node = rb_first(&cluster->root);
2932 if (!node)
2933 goto out;
2934
2935 entry = rb_entry(node, struct btrfs_free_space, offset_index);
67871254 2936 while (1) {
ad22cf6e
JB
2937 if (entry->bytes < bytes)
2938 *max_extent_size = max(get_max_extent_size(entry),
2939 *max_extent_size);
a4820398 2940
4e69b598
JB
2941 if (entry->bytes < bytes ||
2942 (!entry->bitmap && entry->offset < min_start)) {
fa9c0d79
CM
2943 node = rb_next(&entry->offset_index);
2944 if (!node)
2945 break;
2946 entry = rb_entry(node, struct btrfs_free_space,
2947 offset_index);
2948 continue;
2949 }
fa9c0d79 2950
4e69b598
JB
2951 if (entry->bitmap) {
2952 ret = btrfs_alloc_from_bitmap(block_group,
2953 cluster, entry, bytes,
a4820398
MX
2954 cluster->window_start,
2955 max_extent_size);
4e69b598 2956 if (ret == 0) {
4e69b598
JB
2957 node = rb_next(&entry->offset_index);
2958 if (!node)
2959 break;
2960 entry = rb_entry(node, struct btrfs_free_space,
2961 offset_index);
2962 continue;
2963 }
9b230628 2964 cluster->window_start += bytes;
4e69b598 2965 } else {
4e69b598
JB
2966 ret = entry->offset;
2967
2968 entry->offset += bytes;
2969 entry->bytes -= bytes;
2970 }
fa9c0d79 2971
5e71b5d5 2972 if (entry->bytes == 0)
fa9c0d79 2973 rb_erase(&entry->offset_index, &cluster->root);
fa9c0d79
CM
2974 break;
2975 }
2976out:
2977 spin_unlock(&cluster->lock);
96303081 2978
5e71b5d5
LZ
2979 if (!ret)
2980 return 0;
2981
34d52cb6 2982 spin_lock(&ctl->tree_lock);
5e71b5d5 2983
9ddf648f
DZ
2984 if (!btrfs_free_space_trimmed(entry))
2985 atomic64_add(bytes, &discard_ctl->discard_bytes_saved);
2986
34d52cb6 2987 ctl->free_space -= bytes;
5dc7c10b
DZ
2988 if (!entry->bitmap && !btrfs_free_space_trimmed(entry))
2989 ctl->discardable_bytes[BTRFS_STAT_CURR] -= bytes;
5e71b5d5 2990 if (entry->bytes == 0) {
34d52cb6 2991 ctl->free_extents--;
4e69b598 2992 if (entry->bitmap) {
3acd4850
CL
2993 kmem_cache_free(btrfs_free_space_bitmap_cachep,
2994 entry->bitmap);
34d52cb6
LZ
2995 ctl->total_bitmaps--;
2996 ctl->op->recalc_thresholds(ctl);
dfb79ddb
DZ
2997 } else if (!btrfs_free_space_trimmed(entry)) {
2998 ctl->discardable_extents[BTRFS_STAT_CURR]--;
4e69b598 2999 }
dc89e982 3000 kmem_cache_free(btrfs_free_space_cachep, entry);
5e71b5d5
LZ
3001 }
3002
34d52cb6 3003 spin_unlock(&ctl->tree_lock);
5e71b5d5 3004
fa9c0d79
CM
3005 return ret;
3006}
3007
32da5386 3008static int btrfs_bitmap_cluster(struct btrfs_block_group *block_group,
96303081
JB
3009 struct btrfs_free_space *entry,
3010 struct btrfs_free_cluster *cluster,
1bb91902
AO
3011 u64 offset, u64 bytes,
3012 u64 cont1_bytes, u64 min_bytes)
96303081 3013{
34d52cb6 3014 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
96303081
JB
3015 unsigned long next_zero;
3016 unsigned long i;
1bb91902
AO
3017 unsigned long want_bits;
3018 unsigned long min_bits;
96303081 3019 unsigned long found_bits;
cef40483 3020 unsigned long max_bits = 0;
96303081
JB
3021 unsigned long start = 0;
3022 unsigned long total_found = 0;
4e69b598 3023 int ret;
96303081 3024
96009762 3025 i = offset_to_bit(entry->offset, ctl->unit,
96303081 3026 max_t(u64, offset, entry->offset));
96009762
WSH
3027 want_bits = bytes_to_bits(bytes, ctl->unit);
3028 min_bits = bytes_to_bits(min_bytes, ctl->unit);
96303081 3029
cef40483
JB
3030 /*
3031 * Don't bother looking for a cluster in this bitmap if it's heavily
3032 * fragmented.
3033 */
3034 if (entry->max_extent_size &&
3035 entry->max_extent_size < cont1_bytes)
3036 return -ENOSPC;
96303081
JB
3037again:
3038 found_bits = 0;
ebb3dad4 3039 for_each_set_bit_from(i, entry->bitmap, BITS_PER_BITMAP) {
96303081
JB
3040 next_zero = find_next_zero_bit(entry->bitmap,
3041 BITS_PER_BITMAP, i);
1bb91902 3042 if (next_zero - i >= min_bits) {
96303081 3043 found_bits = next_zero - i;
cef40483
JB
3044 if (found_bits > max_bits)
3045 max_bits = found_bits;
96303081
JB
3046 break;
3047 }
cef40483
JB
3048 if (next_zero - i > max_bits)
3049 max_bits = next_zero - i;
96303081
JB
3050 i = next_zero;
3051 }
3052
cef40483
JB
3053 if (!found_bits) {
3054 entry->max_extent_size = (u64)max_bits * ctl->unit;
4e69b598 3055 return -ENOSPC;
cef40483 3056 }
96303081 3057
1bb91902 3058 if (!total_found) {
96303081 3059 start = i;
b78d09bc 3060 cluster->max_size = 0;
96303081
JB
3061 }
3062
3063 total_found += found_bits;
3064
96009762
WSH
3065 if (cluster->max_size < found_bits * ctl->unit)
3066 cluster->max_size = found_bits * ctl->unit;
96303081 3067
1bb91902
AO
3068 if (total_found < want_bits || cluster->max_size < cont1_bytes) {
3069 i = next_zero + 1;
96303081
JB
3070 goto again;
3071 }
3072
96009762 3073 cluster->window_start = start * ctl->unit + entry->offset;
34d52cb6 3074 rb_erase(&entry->offset_index, &ctl->free_space_offset);
4e69b598
JB
3075 ret = tree_insert_offset(&cluster->root, entry->offset,
3076 &entry->offset_index, 1);
b12d6869 3077 ASSERT(!ret); /* -EEXIST; Logic error */
96303081 3078
3f7de037 3079 trace_btrfs_setup_cluster(block_group, cluster,
96009762 3080 total_found * ctl->unit, 1);
96303081
JB
3081 return 0;
3082}
3083
4e69b598
JB
3084/*
3085 * This searches the block group for just extents to fill the cluster with.
1bb91902
AO
3086 * Try to find a cluster with at least bytes total bytes, at least one
3087 * extent of cont1_bytes, and other clusters of at least min_bytes.
4e69b598 3088 */
3de85bb9 3089static noinline int
32da5386 3090setup_cluster_no_bitmap(struct btrfs_block_group *block_group,
3de85bb9
JB
3091 struct btrfs_free_cluster *cluster,
3092 struct list_head *bitmaps, u64 offset, u64 bytes,
1bb91902 3093 u64 cont1_bytes, u64 min_bytes)
4e69b598 3094{
34d52cb6 3095 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
4e69b598
JB
3096 struct btrfs_free_space *first = NULL;
3097 struct btrfs_free_space *entry = NULL;
4e69b598
JB
3098 struct btrfs_free_space *last;
3099 struct rb_node *node;
4e69b598
JB
3100 u64 window_free;
3101 u64 max_extent;
3f7de037 3102 u64 total_size = 0;
4e69b598 3103
34d52cb6 3104 entry = tree_search_offset(ctl, offset, 0, 1);
4e69b598
JB
3105 if (!entry)
3106 return -ENOSPC;
3107
3108 /*
3109 * We don't want bitmaps, so just move along until we find a normal
3110 * extent entry.
3111 */
1bb91902
AO
3112 while (entry->bitmap || entry->bytes < min_bytes) {
3113 if (entry->bitmap && list_empty(&entry->list))
86d4a77b 3114 list_add_tail(&entry->list, bitmaps);
4e69b598
JB
3115 node = rb_next(&entry->offset_index);
3116 if (!node)
3117 return -ENOSPC;
3118 entry = rb_entry(node, struct btrfs_free_space, offset_index);
3119 }
3120
4e69b598
JB
3121 window_free = entry->bytes;
3122 max_extent = entry->bytes;
3123 first = entry;
3124 last = entry;
4e69b598 3125
1bb91902
AO
3126 for (node = rb_next(&entry->offset_index); node;
3127 node = rb_next(&entry->offset_index)) {
4e69b598
JB
3128 entry = rb_entry(node, struct btrfs_free_space, offset_index);
3129
86d4a77b
JB
3130 if (entry->bitmap) {
3131 if (list_empty(&entry->list))
3132 list_add_tail(&entry->list, bitmaps);
4e69b598 3133 continue;
86d4a77b
JB
3134 }
3135
1bb91902
AO
3136 if (entry->bytes < min_bytes)
3137 continue;
3138
3139 last = entry;
3140 window_free += entry->bytes;
3141 if (entry->bytes > max_extent)
4e69b598 3142 max_extent = entry->bytes;
4e69b598
JB
3143 }
3144
1bb91902
AO
3145 if (window_free < bytes || max_extent < cont1_bytes)
3146 return -ENOSPC;
3147
4e69b598
JB
3148 cluster->window_start = first->offset;
3149
3150 node = &first->offset_index;
3151
3152 /*
3153 * now we've found our entries, pull them out of the free space
3154 * cache and put them into the cluster rbtree
3155 */
3156 do {
3157 int ret;
3158
3159 entry = rb_entry(node, struct btrfs_free_space, offset_index);
3160 node = rb_next(&entry->offset_index);
1bb91902 3161 if (entry->bitmap || entry->bytes < min_bytes)
4e69b598
JB
3162 continue;
3163
34d52cb6 3164 rb_erase(&entry->offset_index, &ctl->free_space_offset);
4e69b598
JB
3165 ret = tree_insert_offset(&cluster->root, entry->offset,
3166 &entry->offset_index, 0);
3f7de037 3167 total_size += entry->bytes;
b12d6869 3168 ASSERT(!ret); /* -EEXIST; Logic error */
4e69b598
JB
3169 } while (node && entry != last);
3170
3171 cluster->max_size = max_extent;
3f7de037 3172 trace_btrfs_setup_cluster(block_group, cluster, total_size, 0);
4e69b598
JB
3173 return 0;
3174}
3175
3176/*
3177 * This specifically looks for bitmaps that may work in the cluster, we assume
3178 * that we have already failed to find extents that will work.
3179 */
3de85bb9 3180static noinline int
32da5386 3181setup_cluster_bitmap(struct btrfs_block_group *block_group,
3de85bb9
JB
3182 struct btrfs_free_cluster *cluster,
3183 struct list_head *bitmaps, u64 offset, u64 bytes,
1bb91902 3184 u64 cont1_bytes, u64 min_bytes)
4e69b598 3185{
34d52cb6 3186 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
1b9b922a 3187 struct btrfs_free_space *entry = NULL;
4e69b598 3188 int ret = -ENOSPC;
0f0fbf1d 3189 u64 bitmap_offset = offset_to_bitmap(ctl, offset);
4e69b598 3190
34d52cb6 3191 if (ctl->total_bitmaps == 0)
4e69b598
JB
3192 return -ENOSPC;
3193
0f0fbf1d
LZ
3194 /*
3195 * The bitmap that covers offset won't be in the list unless offset
3196 * is just its start offset.
3197 */
1b9b922a
CM
3198 if (!list_empty(bitmaps))
3199 entry = list_first_entry(bitmaps, struct btrfs_free_space, list);
3200
3201 if (!entry || entry->offset != bitmap_offset) {
0f0fbf1d
LZ
3202 entry = tree_search_offset(ctl, bitmap_offset, 1, 0);
3203 if (entry && list_empty(&entry->list))
3204 list_add(&entry->list, bitmaps);
3205 }
3206
86d4a77b 3207 list_for_each_entry(entry, bitmaps, list) {
357b9784 3208 if (entry->bytes < bytes)
86d4a77b
JB
3209 continue;
3210 ret = btrfs_bitmap_cluster(block_group, entry, cluster, offset,
1bb91902 3211 bytes, cont1_bytes, min_bytes);
86d4a77b
JB
3212 if (!ret)
3213 return 0;
3214 }
3215
3216 /*
52621cb6
LZ
3217 * The bitmaps list has all the bitmaps that record free space
3218 * starting after offset, so no more search is required.
86d4a77b 3219 */
52621cb6 3220 return -ENOSPC;
4e69b598
JB
3221}
3222
fa9c0d79
CM
3223/*
3224 * here we try to find a cluster of blocks in a block group. The goal
1bb91902 3225 * is to find at least bytes+empty_size.
fa9c0d79
CM
3226 * We might not find them all in one contiguous area.
3227 *
3228 * returns zero and sets up cluster if things worked out, otherwise
3229 * it returns -enospc
3230 */
32da5386 3231int btrfs_find_space_cluster(struct btrfs_block_group *block_group,
fa9c0d79
CM
3232 struct btrfs_free_cluster *cluster,
3233 u64 offset, u64 bytes, u64 empty_size)
3234{
2ceeae2e 3235 struct btrfs_fs_info *fs_info = block_group->fs_info;
34d52cb6 3236 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
86d4a77b 3237 struct btrfs_free_space *entry, *tmp;
52621cb6 3238 LIST_HEAD(bitmaps);
fa9c0d79 3239 u64 min_bytes;
1bb91902 3240 u64 cont1_bytes;
fa9c0d79
CM
3241 int ret;
3242
1bb91902
AO
3243 /*
3244 * Choose the minimum extent size we'll require for this
3245 * cluster. For SSD_SPREAD, don't allow any fragmentation.
3246 * For metadata, allow allocates with smaller extents. For
3247 * data, keep it dense.
3248 */
0b246afa 3249 if (btrfs_test_opt(fs_info, SSD_SPREAD)) {
1bb91902 3250 cont1_bytes = min_bytes = bytes + empty_size;
451d7585 3251 } else if (block_group->flags & BTRFS_BLOCK_GROUP_METADATA) {
1bb91902 3252 cont1_bytes = bytes;
0b246afa 3253 min_bytes = fs_info->sectorsize;
1bb91902
AO
3254 } else {
3255 cont1_bytes = max(bytes, (bytes + empty_size) >> 2);
0b246afa 3256 min_bytes = fs_info->sectorsize;
1bb91902 3257 }
fa9c0d79 3258
34d52cb6 3259 spin_lock(&ctl->tree_lock);
7d0d2e8e
JB
3260
3261 /*
3262 * If we know we don't have enough space to make a cluster don't even
3263 * bother doing all the work to try and find one.
3264 */
1bb91902 3265 if (ctl->free_space < bytes) {
34d52cb6 3266 spin_unlock(&ctl->tree_lock);
7d0d2e8e
JB
3267 return -ENOSPC;
3268 }
3269
fa9c0d79
CM
3270 spin_lock(&cluster->lock);
3271
3272 /* someone already found a cluster, hooray */
3273 if (cluster->block_group) {
3274 ret = 0;
3275 goto out;
3276 }
fa9c0d79 3277
3f7de037
JB
3278 trace_btrfs_find_cluster(block_group, offset, bytes, empty_size,
3279 min_bytes);
3280
86d4a77b 3281 ret = setup_cluster_no_bitmap(block_group, cluster, &bitmaps, offset,
1bb91902
AO
3282 bytes + empty_size,
3283 cont1_bytes, min_bytes);
4e69b598 3284 if (ret)
86d4a77b 3285 ret = setup_cluster_bitmap(block_group, cluster, &bitmaps,
1bb91902
AO
3286 offset, bytes + empty_size,
3287 cont1_bytes, min_bytes);
86d4a77b
JB
3288
3289 /* Clear our temporary list */
3290 list_for_each_entry_safe(entry, tmp, &bitmaps, list)
3291 list_del_init(&entry->list);
fa9c0d79 3292
4e69b598 3293 if (!ret) {
b5790d51 3294 btrfs_get_block_group(block_group);
4e69b598
JB
3295 list_add_tail(&cluster->block_group_list,
3296 &block_group->cluster_list);
3297 cluster->block_group = block_group;
3f7de037
JB
3298 } else {
3299 trace_btrfs_failed_cluster_setup(block_group);
fa9c0d79 3300 }
fa9c0d79
CM
3301out:
3302 spin_unlock(&cluster->lock);
34d52cb6 3303 spin_unlock(&ctl->tree_lock);
fa9c0d79
CM
3304
3305 return ret;
3306}
3307
3308/*
3309 * simple code to zero out a cluster
3310 */
3311void btrfs_init_free_cluster(struct btrfs_free_cluster *cluster)
3312{
3313 spin_lock_init(&cluster->lock);
3314 spin_lock_init(&cluster->refill_lock);
6bef4d31 3315 cluster->root = RB_ROOT;
fa9c0d79 3316 cluster->max_size = 0;
c759c4e1 3317 cluster->fragmented = false;
fa9c0d79
CM
3318 INIT_LIST_HEAD(&cluster->block_group_list);
3319 cluster->block_group = NULL;
3320}
3321
32da5386 3322static int do_trimming(struct btrfs_block_group *block_group,
7fe1e641 3323 u64 *total_trimmed, u64 start, u64 bytes,
55507ce3 3324 u64 reserved_start, u64 reserved_bytes,
b0643e59 3325 enum btrfs_trim_state reserved_trim_state,
55507ce3 3326 struct btrfs_trim_range *trim_entry)
f7039b1d 3327{
7fe1e641 3328 struct btrfs_space_info *space_info = block_group->space_info;
f7039b1d 3329 struct btrfs_fs_info *fs_info = block_group->fs_info;
55507ce3 3330 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
7fe1e641
LZ
3331 int ret;
3332 int update = 0;
b0643e59
DZ
3333 const u64 end = start + bytes;
3334 const u64 reserved_end = reserved_start + reserved_bytes;
3335 enum btrfs_trim_state trim_state = BTRFS_TRIM_STATE_UNTRIMMED;
7fe1e641 3336 u64 trimmed = 0;
f7039b1d 3337
7fe1e641
LZ
3338 spin_lock(&space_info->lock);
3339 spin_lock(&block_group->lock);
3340 if (!block_group->ro) {
3341 block_group->reserved += reserved_bytes;
3342 space_info->bytes_reserved += reserved_bytes;
3343 update = 1;
3344 }
3345 spin_unlock(&block_group->lock);
3346 spin_unlock(&space_info->lock);
3347
2ff7e61e 3348 ret = btrfs_discard_extent(fs_info, start, bytes, &trimmed);
b0643e59 3349 if (!ret) {
7fe1e641 3350 *total_trimmed += trimmed;
b0643e59
DZ
3351 trim_state = BTRFS_TRIM_STATE_TRIMMED;
3352 }
7fe1e641 3353
55507ce3 3354 mutex_lock(&ctl->cache_writeout_mutex);
b0643e59
DZ
3355 if (reserved_start < start)
3356 __btrfs_add_free_space(fs_info, ctl, reserved_start,
3357 start - reserved_start,
3358 reserved_trim_state);
3359 if (start + bytes < reserved_start + reserved_bytes)
3360 __btrfs_add_free_space(fs_info, ctl, end, reserved_end - end,
3361 reserved_trim_state);
3362 __btrfs_add_free_space(fs_info, ctl, start, bytes, trim_state);
55507ce3
FM
3363 list_del(&trim_entry->list);
3364 mutex_unlock(&ctl->cache_writeout_mutex);
7fe1e641
LZ
3365
3366 if (update) {
3367 spin_lock(&space_info->lock);
3368 spin_lock(&block_group->lock);
3369 if (block_group->ro)
3370 space_info->bytes_readonly += reserved_bytes;
3371 block_group->reserved -= reserved_bytes;
3372 space_info->bytes_reserved -= reserved_bytes;
7fe1e641 3373 spin_unlock(&block_group->lock);
8f63a840 3374 spin_unlock(&space_info->lock);
7fe1e641
LZ
3375 }
3376
3377 return ret;
3378}
3379
2bee7eb8
DZ
3380/*
3381 * If @async is set, then we will trim 1 region and return.
3382 */
32da5386 3383static int trim_no_bitmap(struct btrfs_block_group *block_group,
2bee7eb8
DZ
3384 u64 *total_trimmed, u64 start, u64 end, u64 minlen,
3385 bool async)
7fe1e641 3386{
19b2a2c7
DZ
3387 struct btrfs_discard_ctl *discard_ctl =
3388 &block_group->fs_info->discard_ctl;
7fe1e641
LZ
3389 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
3390 struct btrfs_free_space *entry;
3391 struct rb_node *node;
3392 int ret = 0;
3393 u64 extent_start;
3394 u64 extent_bytes;
b0643e59 3395 enum btrfs_trim_state extent_trim_state;
7fe1e641 3396 u64 bytes;
19b2a2c7 3397 const u64 max_discard_size = READ_ONCE(discard_ctl->max_discard_size);
f7039b1d
LD
3398
3399 while (start < end) {
55507ce3
FM
3400 struct btrfs_trim_range trim_entry;
3401
3402 mutex_lock(&ctl->cache_writeout_mutex);
34d52cb6 3403 spin_lock(&ctl->tree_lock);
f7039b1d 3404
2bee7eb8
DZ
3405 if (ctl->free_space < minlen)
3406 goto out_unlock;
f7039b1d 3407
34d52cb6 3408 entry = tree_search_offset(ctl, start, 0, 1);
2bee7eb8
DZ
3409 if (!entry)
3410 goto out_unlock;
f7039b1d 3411
2bee7eb8
DZ
3412 /* Skip bitmaps and if async, already trimmed entries */
3413 while (entry->bitmap ||
3414 (async && btrfs_free_space_trimmed(entry))) {
7fe1e641 3415 node = rb_next(&entry->offset_index);
2bee7eb8
DZ
3416 if (!node)
3417 goto out_unlock;
7fe1e641
LZ
3418 entry = rb_entry(node, struct btrfs_free_space,
3419 offset_index);
f7039b1d
LD
3420 }
3421
2bee7eb8
DZ
3422 if (entry->offset >= end)
3423 goto out_unlock;
f7039b1d 3424
7fe1e641
LZ
3425 extent_start = entry->offset;
3426 extent_bytes = entry->bytes;
b0643e59 3427 extent_trim_state = entry->trim_state;
4aa9ad52
DZ
3428 if (async) {
3429 start = entry->offset;
3430 bytes = entry->bytes;
3431 if (bytes < minlen) {
3432 spin_unlock(&ctl->tree_lock);
3433 mutex_unlock(&ctl->cache_writeout_mutex);
3434 goto next;
3435 }
3436 unlink_free_space(ctl, entry);
7fe6d45e
DZ
3437 /*
3438 * Let bytes = BTRFS_MAX_DISCARD_SIZE + X.
3439 * If X < BTRFS_ASYNC_DISCARD_MIN_FILTER, we won't trim
3440 * X when we come back around. So trim it now.
3441 */
3442 if (max_discard_size &&
3443 bytes >= (max_discard_size +
3444 BTRFS_ASYNC_DISCARD_MIN_FILTER)) {
19b2a2c7
DZ
3445 bytes = max_discard_size;
3446 extent_bytes = max_discard_size;
3447 entry->offset += max_discard_size;
3448 entry->bytes -= max_discard_size;
4aa9ad52
DZ
3449 link_free_space(ctl, entry);
3450 } else {
3451 kmem_cache_free(btrfs_free_space_cachep, entry);
3452 }
3453 } else {
3454 start = max(start, extent_start);
3455 bytes = min(extent_start + extent_bytes, end) - start;
3456 if (bytes < minlen) {
3457 spin_unlock(&ctl->tree_lock);
3458 mutex_unlock(&ctl->cache_writeout_mutex);
3459 goto next;
3460 }
f7039b1d 3461
4aa9ad52
DZ
3462 unlink_free_space(ctl, entry);
3463 kmem_cache_free(btrfs_free_space_cachep, entry);
3464 }
7fe1e641 3465
34d52cb6 3466 spin_unlock(&ctl->tree_lock);
55507ce3
FM
3467 trim_entry.start = extent_start;
3468 trim_entry.bytes = extent_bytes;
3469 list_add_tail(&trim_entry.list, &ctl->trimming_ranges);
3470 mutex_unlock(&ctl->cache_writeout_mutex);
f7039b1d 3471
7fe1e641 3472 ret = do_trimming(block_group, total_trimmed, start, bytes,
b0643e59
DZ
3473 extent_start, extent_bytes, extent_trim_state,
3474 &trim_entry);
2bee7eb8
DZ
3475 if (ret) {
3476 block_group->discard_cursor = start + bytes;
7fe1e641 3477 break;
2bee7eb8 3478 }
7fe1e641
LZ
3479next:
3480 start += bytes;
2bee7eb8
DZ
3481 block_group->discard_cursor = start;
3482 if (async && *total_trimmed)
3483 break;
f7039b1d 3484
7fe1e641
LZ
3485 if (fatal_signal_pending(current)) {
3486 ret = -ERESTARTSYS;
3487 break;
3488 }
3489
3490 cond_resched();
3491 }
2bee7eb8
DZ
3492
3493 return ret;
3494
3495out_unlock:
3496 block_group->discard_cursor = btrfs_block_group_end(block_group);
3497 spin_unlock(&ctl->tree_lock);
3498 mutex_unlock(&ctl->cache_writeout_mutex);
3499
7fe1e641
LZ
3500 return ret;
3501}
3502
da080fe1
DZ
3503/*
3504 * If we break out of trimming a bitmap prematurely, we should reset the
3505 * trimming bit. In a rather contrieved case, it's possible to race here so
3506 * reset the state to BTRFS_TRIM_STATE_UNTRIMMED.
3507 *
3508 * start = start of bitmap
3509 * end = near end of bitmap
3510 *
3511 * Thread 1: Thread 2:
3512 * trim_bitmaps(start)
3513 * trim_bitmaps(end)
3514 * end_trimming_bitmap()
3515 * reset_trimming_bitmap()
3516 */
3517static void reset_trimming_bitmap(struct btrfs_free_space_ctl *ctl, u64 offset)
3518{
3519 struct btrfs_free_space *entry;
3520
3521 spin_lock(&ctl->tree_lock);
3522 entry = tree_search_offset(ctl, offset, 1, 0);
dfb79ddb 3523 if (entry) {
5dc7c10b 3524 if (btrfs_free_space_trimmed(entry)) {
dfb79ddb
DZ
3525 ctl->discardable_extents[BTRFS_STAT_CURR] +=
3526 entry->bitmap_extents;
5dc7c10b
DZ
3527 ctl->discardable_bytes[BTRFS_STAT_CURR] += entry->bytes;
3528 }
da080fe1 3529 entry->trim_state = BTRFS_TRIM_STATE_UNTRIMMED;
dfb79ddb
DZ
3530 }
3531
da080fe1
DZ
3532 spin_unlock(&ctl->tree_lock);
3533}
3534
dfb79ddb
DZ
3535static void end_trimming_bitmap(struct btrfs_free_space_ctl *ctl,
3536 struct btrfs_free_space *entry)
da080fe1 3537{
dfb79ddb 3538 if (btrfs_free_space_trimming_bitmap(entry)) {
da080fe1 3539 entry->trim_state = BTRFS_TRIM_STATE_TRIMMED;
dfb79ddb
DZ
3540 ctl->discardable_extents[BTRFS_STAT_CURR] -=
3541 entry->bitmap_extents;
5dc7c10b 3542 ctl->discardable_bytes[BTRFS_STAT_CURR] -= entry->bytes;
dfb79ddb 3543 }
da080fe1
DZ
3544}
3545
2bee7eb8
DZ
3546/*
3547 * If @async is set, then we will trim 1 region and return.
3548 */
32da5386 3549static int trim_bitmaps(struct btrfs_block_group *block_group,
2bee7eb8 3550 u64 *total_trimmed, u64 start, u64 end, u64 minlen,
7fe6d45e 3551 u64 maxlen, bool async)
7fe1e641 3552{
19b2a2c7
DZ
3553 struct btrfs_discard_ctl *discard_ctl =
3554 &block_group->fs_info->discard_ctl;
7fe1e641
LZ
3555 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
3556 struct btrfs_free_space *entry;
3557 int ret = 0;
3558 int ret2;
3559 u64 bytes;
3560 u64 offset = offset_to_bitmap(ctl, start);
19b2a2c7 3561 const u64 max_discard_size = READ_ONCE(discard_ctl->max_discard_size);
7fe1e641
LZ
3562
3563 while (offset < end) {
3564 bool next_bitmap = false;
55507ce3 3565 struct btrfs_trim_range trim_entry;
7fe1e641 3566
55507ce3 3567 mutex_lock(&ctl->cache_writeout_mutex);
7fe1e641
LZ
3568 spin_lock(&ctl->tree_lock);
3569
3570 if (ctl->free_space < minlen) {
2bee7eb8
DZ
3571 block_group->discard_cursor =
3572 btrfs_block_group_end(block_group);
7fe1e641 3573 spin_unlock(&ctl->tree_lock);
55507ce3 3574 mutex_unlock(&ctl->cache_writeout_mutex);
7fe1e641
LZ
3575 break;
3576 }
3577
3578 entry = tree_search_offset(ctl, offset, 1, 0);
7fe6d45e
DZ
3579 /*
3580 * Bitmaps are marked trimmed lossily now to prevent constant
3581 * discarding of the same bitmap (the reason why we are bound
3582 * by the filters). So, retrim the block group bitmaps when we
3583 * are preparing to punt to the unused_bgs list. This uses
3584 * @minlen to determine if we are in BTRFS_DISCARD_INDEX_UNUSED
3585 * which is the only discard index which sets minlen to 0.
3586 */
3587 if (!entry || (async && minlen && start == offset &&
2bee7eb8 3588 btrfs_free_space_trimmed(entry))) {
7fe1e641 3589 spin_unlock(&ctl->tree_lock);
55507ce3 3590 mutex_unlock(&ctl->cache_writeout_mutex);
7fe1e641
LZ
3591 next_bitmap = true;
3592 goto next;
3593 }
3594
da080fe1
DZ
3595 /*
3596 * Async discard bitmap trimming begins at by setting the start
3597 * to be key.objectid and the offset_to_bitmap() aligns to the
3598 * start of the bitmap. This lets us know we are fully
3599 * scanning the bitmap rather than only some portion of it.
3600 */
3601 if (start == offset)
3602 entry->trim_state = BTRFS_TRIM_STATE_TRIMMING;
3603
7fe1e641 3604 bytes = minlen;
0584f718 3605 ret2 = search_bitmap(ctl, entry, &start, &bytes, false);
7fe1e641 3606 if (ret2 || start >= end) {
da080fe1 3607 /*
7fe6d45e
DZ
3608 * We lossily consider a bitmap trimmed if we only skip
3609 * over regions <= BTRFS_ASYNC_DISCARD_MIN_FILTER.
da080fe1 3610 */
7fe6d45e 3611 if (ret2 && minlen <= BTRFS_ASYNC_DISCARD_MIN_FILTER)
dfb79ddb 3612 end_trimming_bitmap(ctl, entry);
da080fe1
DZ
3613 else
3614 entry->trim_state = BTRFS_TRIM_STATE_UNTRIMMED;
7fe1e641 3615 spin_unlock(&ctl->tree_lock);
55507ce3 3616 mutex_unlock(&ctl->cache_writeout_mutex);
7fe1e641
LZ
3617 next_bitmap = true;
3618 goto next;
3619 }
3620
2bee7eb8
DZ
3621 /*
3622 * We already trimmed a region, but are using the locking above
3623 * to reset the trim_state.
3624 */
3625 if (async && *total_trimmed) {
3626 spin_unlock(&ctl->tree_lock);
3627 mutex_unlock(&ctl->cache_writeout_mutex);
3628 goto out;
3629 }
3630
7fe1e641 3631 bytes = min(bytes, end - start);
7fe6d45e 3632 if (bytes < minlen || (async && maxlen && bytes > maxlen)) {
7fe1e641 3633 spin_unlock(&ctl->tree_lock);
55507ce3 3634 mutex_unlock(&ctl->cache_writeout_mutex);
7fe1e641
LZ
3635 goto next;
3636 }
3637
7fe6d45e
DZ
3638 /*
3639 * Let bytes = BTRFS_MAX_DISCARD_SIZE + X.
3640 * If X < @minlen, we won't trim X when we come back around.
3641 * So trim it now. We differ here from trimming extents as we
3642 * don't keep individual state per bit.
3643 */
3644 if (async &&
3645 max_discard_size &&
3646 bytes > (max_discard_size + minlen))
19b2a2c7 3647 bytes = max_discard_size;
4aa9ad52 3648
7fe1e641
LZ
3649 bitmap_clear_bits(ctl, entry, start, bytes);
3650 if (entry->bytes == 0)
3651 free_bitmap(ctl, entry);
3652
3653 spin_unlock(&ctl->tree_lock);
55507ce3
FM
3654 trim_entry.start = start;
3655 trim_entry.bytes = bytes;
3656 list_add_tail(&trim_entry.list, &ctl->trimming_ranges);
3657 mutex_unlock(&ctl->cache_writeout_mutex);
7fe1e641
LZ
3658
3659 ret = do_trimming(block_group, total_trimmed, start, bytes,
b0643e59 3660 start, bytes, 0, &trim_entry);
da080fe1
DZ
3661 if (ret) {
3662 reset_trimming_bitmap(ctl, offset);
2bee7eb8
DZ
3663 block_group->discard_cursor =
3664 btrfs_block_group_end(block_group);
7fe1e641 3665 break;
da080fe1 3666 }
7fe1e641
LZ
3667next:
3668 if (next_bitmap) {
3669 offset += BITS_PER_BITMAP * ctl->unit;
da080fe1 3670 start = offset;
7fe1e641
LZ
3671 } else {
3672 start += bytes;
f7039b1d 3673 }
2bee7eb8 3674 block_group->discard_cursor = start;
f7039b1d
LD
3675
3676 if (fatal_signal_pending(current)) {
da080fe1
DZ
3677 if (start != offset)
3678 reset_trimming_bitmap(ctl, offset);
f7039b1d
LD
3679 ret = -ERESTARTSYS;
3680 break;
3681 }
3682
3683 cond_resched();
3684 }
3685
2bee7eb8
DZ
3686 if (offset >= end)
3687 block_group->discard_cursor = end;
3688
3689out:
f7039b1d
LD
3690 return ret;
3691}
581bb050 3692
32da5386 3693int btrfs_trim_block_group(struct btrfs_block_group *block_group,
e33e17ee
JM
3694 u64 *trimmed, u64 start, u64 end, u64 minlen)
3695{
da080fe1 3696 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
e33e17ee 3697 int ret;
da080fe1 3698 u64 rem = 0;
e33e17ee
JM
3699
3700 *trimmed = 0;
3701
3702 spin_lock(&block_group->lock);
3703 if (block_group->removed) {
04216820 3704 spin_unlock(&block_group->lock);
e33e17ee 3705 return 0;
04216820 3706 }
6b7304af 3707 btrfs_freeze_block_group(block_group);
e33e17ee
JM
3708 spin_unlock(&block_group->lock);
3709
2bee7eb8 3710 ret = trim_no_bitmap(block_group, trimmed, start, end, minlen, false);
e33e17ee
JM
3711 if (ret)
3712 goto out;
7fe1e641 3713
7fe6d45e 3714 ret = trim_bitmaps(block_group, trimmed, start, end, minlen, 0, false);
da080fe1
DZ
3715 div64_u64_rem(end, BITS_PER_BITMAP * ctl->unit, &rem);
3716 /* If we ended in the middle of a bitmap, reset the trimming flag */
3717 if (rem)
3718 reset_trimming_bitmap(ctl, offset_to_bitmap(ctl, end));
e33e17ee 3719out:
6b7304af 3720 btrfs_unfreeze_block_group(block_group);
7fe1e641
LZ
3721 return ret;
3722}
3723
2bee7eb8
DZ
3724int btrfs_trim_block_group_extents(struct btrfs_block_group *block_group,
3725 u64 *trimmed, u64 start, u64 end, u64 minlen,
3726 bool async)
3727{
3728 int ret;
3729
3730 *trimmed = 0;
3731
3732 spin_lock(&block_group->lock);
3733 if (block_group->removed) {
3734 spin_unlock(&block_group->lock);
3735 return 0;
3736 }
6b7304af 3737 btrfs_freeze_block_group(block_group);
2bee7eb8
DZ
3738 spin_unlock(&block_group->lock);
3739
3740 ret = trim_no_bitmap(block_group, trimmed, start, end, minlen, async);
6b7304af 3741 btrfs_unfreeze_block_group(block_group);
2bee7eb8
DZ
3742
3743 return ret;
3744}
3745
3746int btrfs_trim_block_group_bitmaps(struct btrfs_block_group *block_group,
3747 u64 *trimmed, u64 start, u64 end, u64 minlen,
7fe6d45e 3748 u64 maxlen, bool async)
2bee7eb8
DZ
3749{
3750 int ret;
3751
3752 *trimmed = 0;
3753
3754 spin_lock(&block_group->lock);
3755 if (block_group->removed) {
3756 spin_unlock(&block_group->lock);
3757 return 0;
3758 }
6b7304af 3759 btrfs_freeze_block_group(block_group);
2bee7eb8
DZ
3760 spin_unlock(&block_group->lock);
3761
7fe6d45e
DZ
3762 ret = trim_bitmaps(block_group, trimmed, start, end, minlen, maxlen,
3763 async);
3764
6b7304af 3765 btrfs_unfreeze_block_group(block_group);
2bee7eb8
DZ
3766
3767 return ret;
3768}
3769
74255aa0 3770#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
dc11dd5d
JB
3771/*
3772 * Use this if you need to make a bitmap or extent entry specifically, it
3773 * doesn't do any of the merging that add_free_space does, this acts a lot like
3774 * how the free space cache loading stuff works, so you can get really weird
3775 * configurations.
3776 */
32da5386 3777int test_add_free_space_entry(struct btrfs_block_group *cache,
dc11dd5d 3778 u64 offset, u64 bytes, bool bitmap)
74255aa0 3779{
dc11dd5d
JB
3780 struct btrfs_free_space_ctl *ctl = cache->free_space_ctl;
3781 struct btrfs_free_space *info = NULL, *bitmap_info;
3782 void *map = NULL;
da080fe1 3783 enum btrfs_trim_state trim_state = BTRFS_TRIM_STATE_TRIMMED;
dc11dd5d
JB
3784 u64 bytes_added;
3785 int ret;
74255aa0 3786
dc11dd5d
JB
3787again:
3788 if (!info) {
3789 info = kmem_cache_zalloc(btrfs_free_space_cachep, GFP_NOFS);
3790 if (!info)
3791 return -ENOMEM;
74255aa0
JB
3792 }
3793
dc11dd5d
JB
3794 if (!bitmap) {
3795 spin_lock(&ctl->tree_lock);
3796 info->offset = offset;
3797 info->bytes = bytes;
cef40483 3798 info->max_extent_size = 0;
dc11dd5d
JB
3799 ret = link_free_space(ctl, info);
3800 spin_unlock(&ctl->tree_lock);
3801 if (ret)
3802 kmem_cache_free(btrfs_free_space_cachep, info);
3803 return ret;
3804 }
3805
3806 if (!map) {
3acd4850 3807 map = kmem_cache_zalloc(btrfs_free_space_bitmap_cachep, GFP_NOFS);
dc11dd5d
JB
3808 if (!map) {
3809 kmem_cache_free(btrfs_free_space_cachep, info);
3810 return -ENOMEM;
3811 }
3812 }
3813
3814 spin_lock(&ctl->tree_lock);
3815 bitmap_info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset),
3816 1, 0);
3817 if (!bitmap_info) {
3818 info->bitmap = map;
3819 map = NULL;
3820 add_new_bitmap(ctl, info, offset);
3821 bitmap_info = info;
20005523 3822 info = NULL;
dc11dd5d 3823 }
74255aa0 3824
da080fe1
DZ
3825 bytes_added = add_bytes_to_bitmap(ctl, bitmap_info, offset, bytes,
3826 trim_state);
cef40483 3827
dc11dd5d
JB
3828 bytes -= bytes_added;
3829 offset += bytes_added;
3830 spin_unlock(&ctl->tree_lock);
74255aa0 3831
dc11dd5d
JB
3832 if (bytes)
3833 goto again;
74255aa0 3834
20005523
FM
3835 if (info)
3836 kmem_cache_free(btrfs_free_space_cachep, info);
3acd4850
CL
3837 if (map)
3838 kmem_cache_free(btrfs_free_space_bitmap_cachep, map);
dc11dd5d 3839 return 0;
74255aa0
JB
3840}
3841
3842/*
3843 * Checks to see if the given range is in the free space cache. This is really
3844 * just used to check the absence of space, so if there is free space in the
3845 * range at all we will return 1.
3846 */
32da5386 3847int test_check_exists(struct btrfs_block_group *cache,
dc11dd5d 3848 u64 offset, u64 bytes)
74255aa0
JB
3849{
3850 struct btrfs_free_space_ctl *ctl = cache->free_space_ctl;
3851 struct btrfs_free_space *info;
3852 int ret = 0;
3853
3854 spin_lock(&ctl->tree_lock);
3855 info = tree_search_offset(ctl, offset, 0, 0);
3856 if (!info) {
3857 info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset),
3858 1, 0);
3859 if (!info)
3860 goto out;
3861 }
3862
3863have_info:
3864 if (info->bitmap) {
3865 u64 bit_off, bit_bytes;
3866 struct rb_node *n;
3867 struct btrfs_free_space *tmp;
3868
3869 bit_off = offset;
3870 bit_bytes = ctl->unit;
0584f718 3871 ret = search_bitmap(ctl, info, &bit_off, &bit_bytes, false);
74255aa0
JB
3872 if (!ret) {
3873 if (bit_off == offset) {
3874 ret = 1;
3875 goto out;
3876 } else if (bit_off > offset &&
3877 offset + bytes > bit_off) {
3878 ret = 1;
3879 goto out;
3880 }
3881 }
3882
3883 n = rb_prev(&info->offset_index);
3884 while (n) {
3885 tmp = rb_entry(n, struct btrfs_free_space,
3886 offset_index);
3887 if (tmp->offset + tmp->bytes < offset)
3888 break;
3889 if (offset + bytes < tmp->offset) {
5473e0c4 3890 n = rb_prev(&tmp->offset_index);
74255aa0
JB
3891 continue;
3892 }
3893 info = tmp;
3894 goto have_info;
3895 }
3896
3897 n = rb_next(&info->offset_index);
3898 while (n) {
3899 tmp = rb_entry(n, struct btrfs_free_space,
3900 offset_index);
3901 if (offset + bytes < tmp->offset)
3902 break;
3903 if (tmp->offset + tmp->bytes < offset) {
5473e0c4 3904 n = rb_next(&tmp->offset_index);
74255aa0
JB
3905 continue;
3906 }
3907 info = tmp;
3908 goto have_info;
3909 }
3910
20005523 3911 ret = 0;
74255aa0
JB
3912 goto out;
3913 }
3914
3915 if (info->offset == offset) {
3916 ret = 1;
3917 goto out;
3918 }
3919
3920 if (offset > info->offset && offset < info->offset + info->bytes)
3921 ret = 1;
3922out:
3923 spin_unlock(&ctl->tree_lock);
3924 return ret;
3925}
dc11dd5d 3926#endif /* CONFIG_BTRFS_FS_RUN_SANITY_TESTS */