#include <linux/rcupdate.h>
#include <linux/kthread.h>
#include <linux/slab.h>
+#include <linux/ratelimit.h>
#include "compat.h"
#include "hash.h"
#include "ctree.h"
{
int ret;
u64 discarded_bytes = 0;
- struct btrfs_multi_bio *multi = NULL;
+ struct btrfs_bio *bbio = NULL;
/* Tell the block device(s) that the sectors can be discarded */
ret = btrfs_map_block(&root->fs_info->mapping_tree, REQ_DISCARD,
- bytenr, &num_bytes, &multi, 0);
+ bytenr, &num_bytes, &bbio, 0);
if (!ret) {
- struct btrfs_bio_stripe *stripe = multi->stripes;
+ struct btrfs_bio_stripe *stripe = bbio->stripes;
int i;
- for (i = 0; i < multi->num_stripes; i++, stripe++) {
+ for (i = 0; i < bbio->num_stripes; i++, stripe++) {
if (!stripe->dev->can_discard)
continue;
*/
ret = 0;
}
- kfree(multi);
+ kfree(bbio);
}
if (actual_bytes)
* about 1% of the FS size.
*/
if (force == CHUNK_ALLOC_LIMITED) {
- thresh = btrfs_super_total_bytes(&root->fs_info->super_copy);
+ thresh = btrfs_super_total_bytes(root->fs_info->super_copy);
thresh = max_t(u64, 64 * 1024 * 1024,
div_factor_fine(thresh, 1));
if (num_allocated + alloc_bytes < div_factor(num_bytes, 8))
return 0;
- thresh = btrfs_super_total_bytes(&root->fs_info->super_copy);
+ thresh = btrfs_super_total_bytes(root->fs_info->super_copy);
/* 256MB or 5% of the FS */
thresh = max_t(u64, 256 * 1024 * 1024, div_factor_fine(thresh, 5));
/*
* shrink metadata reservation for delalloc
*/
-static int shrink_delalloc(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, u64 to_reclaim,
+static int shrink_delalloc(struct btrfs_root *root, u64 to_reclaim,
bool wait_ordered)
{
struct btrfs_block_rsv *block_rsv;
struct btrfs_space_info *space_info;
+ struct btrfs_trans_handle *trans;
u64 reserved;
u64 max_reclaim;
u64 reclaimed = 0;
int loops = 0;
unsigned long progress;
+ trans = (struct btrfs_trans_handle *)current->journal_info;
block_rsv = &root->fs_info->delalloc_block_rsv;
space_info = block_rsv->space_info;
smp_mb();
nr_pages = min_t(unsigned long, nr_pages,
root->fs_info->delalloc_bytes >> PAGE_CACHE_SHIFT);
- writeback_inodes_sb_nr_if_idle(root->fs_info->sb, nr_pages);
+ writeback_inodes_sb_nr_if_idle(root->fs_info->sb, nr_pages,
+ WB_REASON_FS_FREE_SPACE);
spin_lock(&space_info->lock);
if (reserved > space_info->bytes_may_use)
return reclaimed >= to_reclaim;
}
+/**
+ * maybe_commit_transaction - possibly commit the transaction if its ok to
+ * @root - the root we're allocating for
+ * @bytes - the number of bytes we want to reserve
+ * @force - force the commit
+ *
+ * This will check to make sure that committing the transaction will actually
+ * get us somewhere and then commit the transaction if it does. Otherwise it
+ * will return -ENOSPC.
+ */
+static int may_commit_transaction(struct btrfs_root *root,
+ struct btrfs_space_info *space_info,
+ u64 bytes, int force)
+{
+ struct btrfs_block_rsv *delayed_rsv = &root->fs_info->delayed_block_rsv;
+ struct btrfs_trans_handle *trans;
+
+ trans = (struct btrfs_trans_handle *)current->journal_info;
+ if (trans)
+ return -EAGAIN;
+
+ if (force)
+ goto commit;
+
+ /* See if there is enough pinned space to make this reservation */
+ spin_lock(&space_info->lock);
+ if (space_info->bytes_pinned >= bytes) {
+ spin_unlock(&space_info->lock);
+ goto commit;
+ }
+ spin_unlock(&space_info->lock);
+
+ /*
+ * See if there is some space in the delayed insertion reservation for
+ * this reservation.
+ */
+ if (space_info != delayed_rsv->space_info)
+ return -ENOSPC;
+
+ spin_lock(&delayed_rsv->lock);
+ if (delayed_rsv->size < bytes) {
+ spin_unlock(&delayed_rsv->lock);
+ return -ENOSPC;
+ }
+ spin_unlock(&delayed_rsv->lock);
+
+commit:
+ trans = btrfs_join_transaction(root);
+ if (IS_ERR(trans))
+ return -ENOSPC;
+
+ return btrfs_commit_transaction(trans, root);
+}
+
/**
* reserve_metadata_bytes - try to reserve bytes from the block_rsv's space
* @root - the root we're allocating for
u64 orig_bytes, int flush)
{
struct btrfs_space_info *space_info = block_rsv->space_info;
- struct btrfs_trans_handle *trans;
u64 used;
u64 num_bytes = orig_bytes;
int retries = 0;
bool flushing = false;
bool wait_ordered = false;
- trans = (struct btrfs_trans_handle *)current->journal_info;
again:
ret = 0;
spin_lock(&space_info->lock);
* deadlock since we are waiting for the flusher to finish, but
* hold the current transaction open.
*/
- if (trans)
+ if (current->journal_info)
return -EAGAIN;
ret = wait_event_interruptible(space_info->wait,
!space_info->flush);
*/
avail = (space_info->total_bytes - space_info->bytes_used) * 8;
do_div(avail, 10);
- if (space_info->bytes_pinned >= avail && flush && !trans &&
- !committed) {
+ if (space_info->bytes_pinned >= avail && flush && !committed) {
space_info->flush = 1;
flushing = true;
spin_unlock(&space_info->lock);
- goto commit;
+ ret = may_commit_transaction(root, space_info,
+ orig_bytes, 1);
+ if (ret)
+ goto out;
+ committed = true;
+ goto again;
}
spin_lock(&root->fs_info->free_chunk_lock);
* We do synchronous shrinking since we don't actually unreserve
* metadata until after the IO is completed.
*/
- ret = shrink_delalloc(trans, root, num_bytes, wait_ordered);
+ ret = shrink_delalloc(root, num_bytes, wait_ordered);
if (ret < 0)
goto out;
goto again;
}
- ret = -EAGAIN;
- if (trans)
- goto out;
-
-commit:
ret = -ENOSPC;
if (committed)
goto out;
- trans = btrfs_join_transaction(root);
- if (IS_ERR(trans))
- goto out;
- ret = btrfs_commit_transaction(trans, root);
+ ret = may_commit_transaction(root, space_info, orig_bytes, 0);
if (!ret) {
- trans = NULL;
committed = true;
goto again;
}
kfree(rsv);
}
-int btrfs_block_rsv_add(struct btrfs_root *root,
- struct btrfs_block_rsv *block_rsv,
- u64 num_bytes)
+static inline int __block_rsv_add(struct btrfs_root *root,
+ struct btrfs_block_rsv *block_rsv,
+ u64 num_bytes, int flush)
{
int ret;
if (num_bytes == 0)
return 0;
- ret = reserve_metadata_bytes(root, block_rsv, num_bytes, 1);
+ ret = reserve_metadata_bytes(root, block_rsv, num_bytes, flush);
if (!ret) {
block_rsv_add_bytes(block_rsv, num_bytes, 1);
return 0;
return ret;
}
+int btrfs_block_rsv_add(struct btrfs_root *root,
+ struct btrfs_block_rsv *block_rsv,
+ u64 num_bytes)
+{
+ return __block_rsv_add(root, block_rsv, num_bytes, 1);
+}
+
+int btrfs_block_rsv_add_noflush(struct btrfs_root *root,
+ struct btrfs_block_rsv *block_rsv,
+ u64 num_bytes)
+{
+ return __block_rsv_add(root, block_rsv, num_bytes, 0);
+}
+
int btrfs_block_rsv_check(struct btrfs_root *root,
struct btrfs_block_rsv *block_rsv, int min_factor)
{
u64 num_bytes;
u64 meta_used;
u64 data_used;
- int csum_size = btrfs_super_csum_size(&fs_info->super_copy);
+ int csum_size = btrfs_super_csum_size(fs_info->super_copy);
sinfo = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_DATA);
spin_lock(&sinfo->lock);
fs_info->delalloc_block_rsv.space_info = space_info;
fs_info->trans_block_rsv.space_info = space_info;
fs_info->empty_block_rsv.space_info = space_info;
+ fs_info->delayed_block_rsv.space_info = space_info;
fs_info->extent_root->block_rsv = &fs_info->global_block_rsv;
fs_info->csum_root->block_rsv = &fs_info->global_block_rsv;
WARN_ON(fs_info->trans_block_rsv.reserved > 0);
WARN_ON(fs_info->chunk_block_rsv.size > 0);
WARN_ON(fs_info->chunk_block_rsv.reserved > 0);
+ WARN_ON(fs_info->delayed_block_rsv.size > 0);
+ WARN_ON(fs_info->delayed_block_rsv.reserved > 0);
}
void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans,
*/
static unsigned drop_outstanding_extent(struct inode *inode)
{
+ unsigned drop_inode_space = 0;
unsigned dropped_extents = 0;
BUG_ON(!BTRFS_I(inode)->outstanding_extents);
BTRFS_I(inode)->outstanding_extents--;
+ if (BTRFS_I(inode)->outstanding_extents == 0 &&
+ BTRFS_I(inode)->delalloc_meta_reserved) {
+ drop_inode_space = 1;
+ BTRFS_I(inode)->delalloc_meta_reserved = 0;
+ }
+
/*
* If we have more or the same amount of outsanding extents than we have
* reserved then we need to leave the reserved extents count alone.
*/
if (BTRFS_I(inode)->outstanding_extents >=
BTRFS_I(inode)->reserved_extents)
- return 0;
+ return drop_inode_space;
dropped_extents = BTRFS_I(inode)->reserved_extents -
BTRFS_I(inode)->outstanding_extents;
BTRFS_I(inode)->reserved_extents -= dropped_extents;
- return dropped_extents;
+ return dropped_extents + drop_inode_space;
}
/**
nr_extents = BTRFS_I(inode)->outstanding_extents -
BTRFS_I(inode)->reserved_extents;
BTRFS_I(inode)->reserved_extents += nr_extents;
+ }
- to_reserve = btrfs_calc_trans_metadata_size(root, nr_extents);
+ /*
+ * Add an item to reserve for updating the inode when we complete the
+ * delalloc io.
+ */
+ if (!BTRFS_I(inode)->delalloc_meta_reserved) {
+ nr_extents++;
+ BTRFS_I(inode)->delalloc_meta_reserved = 1;
}
+
+ to_reserve = btrfs_calc_trans_metadata_size(root, nr_extents);
to_reserve += calc_csum_metadata_size(inode, num_bytes, 1);
spin_unlock(&BTRFS_I(inode)->lock);
/* block accounting for super block */
spin_lock(&info->delalloc_lock);
- old_val = btrfs_super_bytes_used(&info->super_copy);
+ old_val = btrfs_super_bytes_used(info->super_copy);
if (alloc)
old_val += num_bytes;
else
old_val -= num_bytes;
- btrfs_set_super_bytes_used(&info->super_copy, old_val);
+ btrfs_set_super_bytes_used(info->super_copy, old_val);
spin_unlock(&info->delalloc_lock);
while (total) {
return 0;
}
+/*
+ * this function must be called within transaction
+ */
+int btrfs_pin_extent_for_log_replay(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ u64 bytenr, u64 num_bytes)
+{
+ struct btrfs_block_group_cache *cache;
+
+ cache = btrfs_lookup_block_group(root->fs_info, bytenr);
+ BUG_ON(!cache);
+
+ /*
+ * pull in the free space cache (if any) so that our pin
+ * removes the free space from the cache. We have load_only set
+ * to one because the slow code to read in the free extents does check
+ * the pinned extents.
+ */
+ cache_block_group(cache, trans, root, 1);
+
+ pin_down_extent(root, cache, bytenr, num_bytes, 0);
+
+ /* remove us from the free space cache (if we're there at all) */
+ btrfs_remove_free_space(cache, bytenr, num_bytes);
+ btrfs_put_block_group(cache);
+ return 0;
+}
+
/**
* btrfs_update_reserved_bytes - update the block_group and space info counters
* @cache: The cache we are manipulating
return ret;
}
-int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len)
+static int __btrfs_free_reserved_extent(struct btrfs_root *root,
+ u64 start, u64 len, int pin)
{
struct btrfs_block_group_cache *cache;
int ret = 0;
if (btrfs_test_opt(root, DISCARD))
ret = btrfs_discard_extent(root, start, len, NULL);
- btrfs_add_free_space(cache, start, len);
- btrfs_update_reserved_bytes(cache, len, RESERVE_FREE);
+ if (pin)
+ pin_down_extent(root, cache, start, len, 1);
+ else {
+ btrfs_add_free_space(cache, start, len);
+ btrfs_update_reserved_bytes(cache, len, RESERVE_FREE);
+ }
btrfs_put_block_group(cache);
trace_btrfs_reserved_extent_free(root, start, len);
return ret;
}
+int btrfs_free_reserved_extent(struct btrfs_root *root,
+ u64 start, u64 len)
+{
+ return __btrfs_free_reserved_extent(root, start, len, 0);
+}
+
+int btrfs_free_and_pin_reserved_extent(struct btrfs_root *root,
+ u64 start, u64 len)
+{
+ return __btrfs_free_reserved_extent(root, start, len, 1);
+}
+
static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
u64 parent, u64 root_objectid,
if (!ret)
return block_rsv;
if (ret) {
- WARN_ON(1);
+ static DEFINE_RATELIMIT_STATE(_rs,
+ DEFAULT_RATELIMIT_INTERVAL,
+ /*DEFAULT_RATELIMIT_BURST*/ 2);
+ if (__ratelimit(&_rs)) {
+ printk(KERN_DEBUG "btrfs: block rsv returned %d\n", ret);
+ WARN_ON(1);
+ }
ret = reserve_metadata_bytes(root, block_rsv, blocksize, 0);
if (!ret) {
return block_rsv;
return -ENOMEM;
path->reada = 1;
- cache_gen = btrfs_super_cache_generation(&root->fs_info->super_copy);
+ cache_gen = btrfs_super_cache_generation(root->fs_info->super_copy);
if (btrfs_test_opt(root, SPACE_CACHE) &&
- btrfs_super_generation(&root->fs_info->super_copy) != cache_gen)
+ btrfs_super_generation(root->fs_info->super_copy) != cache_gen)
need_clear = 1;
if (btrfs_test_opt(root, CLEAR_CACHE))
need_clear = 1;
int mixed = 0;
int ret;
- disk_super = &fs_info->super_copy;
+ disk_super = fs_info->super_copy;
if (!btrfs_super_root(disk_super))
return 1;