]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/blobdiff - fs/btrfs/inode.c
Btrfs: use percpu counter for fs_info->delalloc_bytes
[mirror_ubuntu-artful-kernel.git] / fs / btrfs / inode.c
index 95542a1b3dfc99632219310f0108788789247fc9..24c0b7805fe190259d3838faa7c47f0b85642e7f 100644 (file)
 #include <linux/slab.h>
 #include <linux/ratelimit.h>
 #include <linux/mount.h>
+#include <linux/btrfs.h>
 #include "compat.h"
 #include "ctree.h"
 #include "disk-io.h"
 #include "transaction.h"
 #include "btrfs_inode.h"
-#include "ioctl.h"
 #include "print-tree.h"
 #include "ordered-data.h"
 #include "xattr.h"
@@ -71,6 +71,7 @@ static const struct file_operations btrfs_dir_file_operations;
 static struct extent_io_ops btrfs_extent_io_ops;
 
 static struct kmem_cache *btrfs_inode_cachep;
+static struct kmem_cache *btrfs_delalloc_work_cachep;
 struct kmem_cache *btrfs_trans_handle_cachep;
 struct kmem_cache *btrfs_transaction_cachep;
 struct kmem_cache *btrfs_path_cachep;
@@ -87,13 +88,17 @@ static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
        [S_IFLNK >> S_SHIFT]    = BTRFS_FT_SYMLINK,
 };
 
-static int btrfs_setsize(struct inode *inode, loff_t newsize);
+static int btrfs_setsize(struct inode *inode, struct iattr *attr);
 static int btrfs_truncate(struct inode *inode);
 static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent);
 static noinline int cow_file_range(struct inode *inode,
                                   struct page *locked_page,
                                   u64 start, u64 end, int *page_started,
                                   unsigned long *nr_written, int unlock);
+static struct extent_map *create_pinned_em(struct inode *inode, u64 start,
+                                          u64 len, u64 orig_start,
+                                          u64 block_start, u64 block_len,
+                                          u64 orig_block_len, int type);
 
 static int btrfs_init_inode_security(struct btrfs_trans_handle *trans,
                                     struct inode *inode,  struct inode *dir,
@@ -695,17 +700,24 @@ retry:
                em->start = async_extent->start;
                em->len = async_extent->ram_size;
                em->orig_start = em->start;
+               em->mod_start = em->start;
+               em->mod_len = em->len;
 
                em->block_start = ins.objectid;
                em->block_len = ins.offset;
+               em->orig_block_len = ins.offset;
                em->bdev = root->fs_info->fs_devices->latest_bdev;
                em->compress_type = async_extent->compress_type;
                set_bit(EXTENT_FLAG_PINNED, &em->flags);
                set_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
+               em->generation = -1;
 
                while (1) {
                        write_lock(&em_tree->lock);
                        ret = add_extent_mapping(em_tree, em);
+                       if (!ret)
+                               list_move(&em->list,
+                                         &em_tree->modified_extents);
                        write_unlock(&em_tree->lock);
                        if (ret != -EEXIST) {
                                free_extent_map(em);
@@ -803,14 +815,14 @@ static u64 get_extent_allocation_hint(struct inode *inode, u64 start,
  * required to start IO on it.  It may be clean and already done with
  * IO when we return.
  */
-static noinline int cow_file_range(struct inode *inode,
-                                  struct page *locked_page,
-                                  u64 start, u64 end, int *page_started,
-                                  unsigned long *nr_written,
-                                  int unlock)
+static noinline int __cow_file_range(struct btrfs_trans_handle *trans,
+                                    struct inode *inode,
+                                    struct btrfs_root *root,
+                                    struct page *locked_page,
+                                    u64 start, u64 end, int *page_started,
+                                    unsigned long *nr_written,
+                                    int unlock)
 {
-       struct btrfs_root *root = BTRFS_I(inode)->root;
-       struct btrfs_trans_handle *trans;
        u64 alloc_hint = 0;
        u64 num_bytes;
        unsigned long ram_size;
@@ -823,25 +835,10 @@ static noinline int cow_file_range(struct inode *inode,
        int ret = 0;
 
        BUG_ON(btrfs_is_free_space_inode(inode));
-       trans = btrfs_join_transaction(root);
-       if (IS_ERR(trans)) {
-               extent_clear_unlock_delalloc(inode,
-                            &BTRFS_I(inode)->io_tree,
-                            start, end, locked_page,
-                            EXTENT_CLEAR_UNLOCK_PAGE |
-                            EXTENT_CLEAR_UNLOCK |
-                            EXTENT_CLEAR_DELALLOC |
-                            EXTENT_CLEAR_DIRTY |
-                            EXTENT_SET_WRITEBACK |
-                            EXTENT_END_WRITEBACK);
-               return PTR_ERR(trans);
-       }
-       trans->block_rsv = &root->fs_info->delalloc_block_rsv;
 
        num_bytes = (end - start + blocksize) & ~(blocksize - 1);
        num_bytes = max(blocksize,  num_bytes);
        disk_num_bytes = num_bytes;
-       ret = 0;
 
        /* if this is a small write inside eof, kick off defrag */
        if (num_bytes < 64 * 1024 &&
@@ -897,15 +894,22 @@ static noinline int cow_file_range(struct inode *inode,
                em->orig_start = em->start;
                ram_size = ins.offset;
                em->len = ins.offset;
+               em->mod_start = em->start;
+               em->mod_len = em->len;
 
                em->block_start = ins.objectid;
                em->block_len = ins.offset;
+               em->orig_block_len = ins.offset;
                em->bdev = root->fs_info->fs_devices->latest_bdev;
                set_bit(EXTENT_FLAG_PINNED, &em->flags);
+               em->generation = -1;
 
                while (1) {
                        write_lock(&em_tree->lock);
                        ret = add_extent_mapping(em_tree, em);
+                       if (!ret)
+                               list_move(&em->list,
+                                         &em_tree->modified_extents);
                        write_unlock(&em_tree->lock);
                        if (ret != -EEXIST) {
                                free_extent_map(em);
@@ -952,11 +956,9 @@ static noinline int cow_file_range(struct inode *inode,
                alloc_hint = ins.objectid + ins.offset;
                start += cur_alloc_size;
        }
-       ret = 0;
 out:
-       btrfs_end_transaction(trans, root);
-
        return ret;
+
 out_unlock:
        extent_clear_unlock_delalloc(inode,
                     &BTRFS_I(inode)->io_tree,
@@ -971,6 +973,39 @@ out_unlock:
        goto out;
 }
 
+static noinline int cow_file_range(struct inode *inode,
+                                  struct page *locked_page,
+                                  u64 start, u64 end, int *page_started,
+                                  unsigned long *nr_written,
+                                  int unlock)
+{
+       struct btrfs_trans_handle *trans;
+       struct btrfs_root *root = BTRFS_I(inode)->root;
+       int ret;
+
+       trans = btrfs_join_transaction(root);
+       if (IS_ERR(trans)) {
+               extent_clear_unlock_delalloc(inode,
+                            &BTRFS_I(inode)->io_tree,
+                            start, end, locked_page,
+                            EXTENT_CLEAR_UNLOCK_PAGE |
+                            EXTENT_CLEAR_UNLOCK |
+                            EXTENT_CLEAR_DELALLOC |
+                            EXTENT_CLEAR_DIRTY |
+                            EXTENT_SET_WRITEBACK |
+                            EXTENT_END_WRITEBACK);
+               return PTR_ERR(trans);
+       }
+       trans->block_rsv = &root->fs_info->delalloc_block_rsv;
+
+       ret = __cow_file_range(trans, inode, root, locked_page, start, end,
+                              page_started, nr_written, unlock);
+
+       btrfs_end_transaction(trans, root);
+
+       return ret;
+}
+
 /*
  * work queue call back to started compression on a file and pages
  */
@@ -1126,6 +1161,7 @@ static noinline int run_delalloc_nocow(struct inode *inode,
        u64 extent_offset;
        u64 disk_bytenr;
        u64 num_bytes;
+       u64 disk_num_bytes;
        int extent_type;
        int ret, err;
        int type;
@@ -1228,6 +1264,8 @@ next_slot:
                        extent_offset = btrfs_file_extent_offset(leaf, fi);
                        extent_end = found_key.offset +
                                btrfs_file_extent_num_bytes(leaf, fi);
+                       disk_num_bytes =
+                               btrfs_file_extent_disk_num_bytes(leaf, fi);
                        if (extent_end <= start) {
                                path->slots[0]++;
                                goto next_slot;
@@ -1281,9 +1319,9 @@ out_check:
 
                btrfs_release_path(path);
                if (cow_start != (u64)-1) {
-                       ret = cow_file_range(inode, locked_page, cow_start,
-                                       found_key.offset - 1, page_started,
-                                       nr_written, 1);
+                       ret = __cow_file_range(trans, inode, root, locked_page,
+                                              cow_start, found_key.offset - 1,
+                                              page_started, nr_written, 1);
                        if (ret) {
                                btrfs_abort_transaction(trans, root, ret);
                                goto error;
@@ -1298,16 +1336,23 @@ out_check:
                        em = alloc_extent_map();
                        BUG_ON(!em); /* -ENOMEM */
                        em->start = cur_offset;
-                       em->orig_start = em->start;
+                       em->orig_start = found_key.offset - extent_offset;
                        em->len = num_bytes;
                        em->block_len = num_bytes;
                        em->block_start = disk_bytenr;
+                       em->orig_block_len = disk_num_bytes;
                        em->bdev = root->fs_info->fs_devices->latest_bdev;
+                       em->mod_start = em->start;
+                       em->mod_len = em->len;
                        set_bit(EXTENT_FLAG_PINNED, &em->flags);
-                       set_bit(EXTENT_FLAG_PREALLOC, &em->flags);
+                       set_bit(EXTENT_FLAG_FILLING, &em->flags);
+                       em->generation = -1;
                        while (1) {
                                write_lock(&em_tree->lock);
                                ret = add_extent_mapping(em_tree, em);
+                               if (!ret)
+                                       list_move(&em->list,
+                                                 &em_tree->modified_extents);
                                write_unlock(&em_tree->lock);
                                if (ret != -EEXIST) {
                                        free_extent_map(em);
@@ -1352,8 +1397,9 @@ out_check:
        }
 
        if (cow_start != (u64)-1) {
-               ret = cow_file_range(inode, locked_page, cow_start, end,
-                                    page_started, nr_written, 1);
+               ret = __cow_file_range(trans, inode, root, locked_page,
+                                      cow_start, end,
+                                      page_started, nr_written, 1);
                if (ret) {
                        btrfs_abort_transaction(trans, root, ret);
                        goto error;
@@ -1470,7 +1516,8 @@ static void btrfs_set_bit_hook(struct inode *inode,
 
                spin_lock(&root->fs_info->delalloc_lock);
                BTRFS_I(inode)->delalloc_bytes += len;
-               root->fs_info->delalloc_bytes += len;
+               __percpu_counter_add(&root->fs_info->delalloc_bytes, len,
+                                    root->fs_info->delalloc_batch);
                if (do_list && list_empty(&BTRFS_I(inode)->delalloc_inodes)) {
                        list_add_tail(&BTRFS_I(inode)->delalloc_inodes,
                                      &root->fs_info->delalloc_inodes);
@@ -1511,7 +1558,8 @@ static void btrfs_clear_bit_hook(struct inode *inode,
                        btrfs_free_reserved_data_space(inode, len);
 
                spin_lock(&root->fs_info->delalloc_lock);
-               root->fs_info->delalloc_bytes -= len;
+               __percpu_counter_add(&root->fs_info->delalloc_bytes, -len,
+                                    root->fs_info->delalloc_batch);
                BTRFS_I(inode)->delalloc_bytes -= len;
 
                if (do_list && BTRFS_I(inode)->delalloc_bytes == 0 &&
@@ -1531,7 +1579,6 @@ int btrfs_merge_bio_hook(struct page *page, unsigned long offset,
                         unsigned long bio_flags)
 {
        struct btrfs_root *root = BTRFS_I(page->mapping->host)->root;
-       struct btrfs_mapping_tree *map_tree;
        u64 logical = (u64)bio->bi_sector << 9;
        u64 length = 0;
        u64 map_length;
@@ -1541,11 +1588,10 @@ int btrfs_merge_bio_hook(struct page *page, unsigned long offset,
                return 0;
 
        length = bio->bi_size;
-       map_tree = &root->fs_info->mapping_tree;
        map_length = length;
-       ret = btrfs_map_block(map_tree, READ, logical,
+       ret = btrfs_map_block(root->fs_info, READ, logical,
                              &map_length, NULL, 0);
-       /* Will always return 0 or 1 with map_multi == NULL */
+       /* Will always return 0 with map_multi == NULL */
        BUG_ON(ret < 0);
        if (map_length < length + size)
                return 1;
@@ -1586,7 +1632,12 @@ static int __btrfs_submit_bio_done(struct inode *inode, int rw, struct bio *bio,
                          u64 bio_offset)
 {
        struct btrfs_root *root = BTRFS_I(inode)->root;
-       return btrfs_map_bio(root, rw, bio, mirror_num, 1);
+       int ret;
+
+       ret = btrfs_map_bio(root, rw, bio, mirror_num, 1);
+       if (ret)
+               bio_endio(bio, ret);
+       return ret;
 }
 
 /*
@@ -1601,6 +1652,7 @@ static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
        int ret = 0;
        int skip_sum;
        int metadata = 0;
+       int async = !atomic_read(&BTRFS_I(inode)->sync_writers);
 
        skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
 
@@ -1610,31 +1662,43 @@ static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
        if (!(rw & REQ_WRITE)) {
                ret = btrfs_bio_wq_end_io(root->fs_info, bio, metadata);
                if (ret)
-                       return ret;
+                       goto out;
 
                if (bio_flags & EXTENT_BIO_COMPRESSED) {
-                       return btrfs_submit_compressed_read(inode, bio,
-                                                   mirror_num, bio_flags);
+                       ret = btrfs_submit_compressed_read(inode, bio,
+                                                          mirror_num,
+                                                          bio_flags);
+                       goto out;
                } else if (!skip_sum) {
                        ret = btrfs_lookup_bio_sums(root, inode, bio, NULL);
                        if (ret)
-                               return ret;
+                               goto out;
                }
                goto mapit;
-       } else if (!skip_sum) {
+       } else if (async && !skip_sum) {
                /* csum items have already been cloned */
                if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
                        goto mapit;
                /* we're doing a write, do the async checksumming */
-               return btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info,
+               ret = btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info,
                                   inode, rw, bio, mirror_num,
                                   bio_flags, bio_offset,
                                   __btrfs_submit_bio_start,
                                   __btrfs_submit_bio_done);
+               goto out;
+       } else if (!skip_sum) {
+               ret = btrfs_csum_one_bio(root, inode, bio, 0, 0);
+               if (ret)
+                       goto out;
        }
 
 mapit:
-       return btrfs_map_bio(root, rw, bio, mirror_num, 0);
+       ret = btrfs_map_bio(root, rw, bio, mirror_num, 0);
+
+out:
+       if (ret < 0)
+               bio_endio(bio, ret);
+       return ret;
 }
 
 /*
@@ -1657,8 +1721,7 @@ static noinline int add_pending_csums(struct btrfs_trans_handle *trans,
 int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end,
                              struct extent_state **cached_state)
 {
-       if ((end & (PAGE_CACHE_SIZE - 1)) == 0)
-               WARN_ON(1);
+       WARN_ON((end & (PAGE_CACHE_SIZE - 1)) == 0);
        return set_extent_delalloc(&BTRFS_I(inode)->io_tree, start, end,
                                   cached_state, GFP_NOFS);
 }
@@ -1867,22 +1930,20 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
 
        if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) {
                BUG_ON(!list_empty(&ordered_extent->list)); /* Logic error */
-               ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent);
-               if (!ret) {
-                       if (nolock)
-                               trans = btrfs_join_transaction_nolock(root);
-                       else
-                               trans = btrfs_join_transaction(root);
-                       if (IS_ERR(trans)) {
-                               ret = PTR_ERR(trans);
-                               trans = NULL;
-                               goto out;
-                       }
-                       trans->block_rsv = &root->fs_info->delalloc_block_rsv;
-                       ret = btrfs_update_inode_fallback(trans, root, inode);
-                       if (ret) /* -ENOMEM or corruption */
-                               btrfs_abort_transaction(trans, root, ret);
+               btrfs_ordered_update_i_size(inode, 0, ordered_extent);
+               if (nolock)
+                       trans = btrfs_join_transaction_nolock(root);
+               else
+                       trans = btrfs_join_transaction(root);
+               if (IS_ERR(trans)) {
+                       ret = PTR_ERR(trans);
+                       trans = NULL;
+                       goto out;
                }
+               trans->block_rsv = &root->fs_info->delalloc_block_rsv;
+               ret = btrfs_update_inode_fallback(trans, root, inode);
+               if (ret) /* -ENOMEM or corruption */
+                       btrfs_abort_transaction(trans, root, ret);
                goto out;
        }
 
@@ -1931,15 +1992,11 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
        add_pending_csums(trans, inode, ordered_extent->file_offset,
                          &ordered_extent->list);
 
-       ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent);
-       if (!ret || !test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags)) {
-               ret = btrfs_update_inode_fallback(trans, root, inode);
-               if (ret) { /* -ENOMEM or corruption */
-                       btrfs_abort_transaction(trans, root, ret);
-                       goto out_unlock;
-               }
-       } else {
-               btrfs_set_inode_last_trans(trans, inode);
+       btrfs_ordered_update_i_size(inode, 0, ordered_extent);
+       ret = btrfs_update_inode_fallback(trans, root, inode);
+       if (ret) { /* -ENOMEM or corruption */
+               btrfs_abort_transaction(trans, root, ret);
+               goto out_unlock;
        }
        ret = 0;
 out_unlock:
@@ -2013,7 +2070,7 @@ static int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end,
 static int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end,
                               struct extent_state *state, int mirror)
 {
-       size_t offset = start - ((u64)page->index << PAGE_CACHE_SHIFT);
+       size_t offset = start - page_offset(page);
        struct inode *inode = page->mapping->host;
        struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
        char *kaddr;
@@ -2429,6 +2486,18 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
                                continue;
                        }
                        nr_truncate++;
+
+                       /* 1 for the orphan item deletion. */
+                       trans = btrfs_start_transaction(root, 1);
+                       if (IS_ERR(trans)) {
+                               ret = PTR_ERR(trans);
+                               goto out;
+                       }
+                       ret = btrfs_orphan_add(trans, inode);
+                       btrfs_end_transaction(trans, root);
+                       if (ret)
+                               goto out;
+
                        ret = btrfs_truncate(inode);
                } else {
                        nr_unlink++;
@@ -2648,34 +2717,41 @@ static void fill_inode_item(struct btrfs_trans_handle *trans,
                            struct btrfs_inode_item *item,
                            struct inode *inode)
 {
-       btrfs_set_inode_uid(leaf, item, i_uid_read(inode));
-       btrfs_set_inode_gid(leaf, item, i_gid_read(inode));
-       btrfs_set_inode_size(leaf, item, BTRFS_I(inode)->disk_i_size);
-       btrfs_set_inode_mode(leaf, item, inode->i_mode);
-       btrfs_set_inode_nlink(leaf, item, inode->i_nlink);
+       struct btrfs_map_token token;
+
+       btrfs_init_map_token(&token);
+
+       btrfs_set_token_inode_uid(leaf, item, i_uid_read(inode), &token);
+       btrfs_set_token_inode_gid(leaf, item, i_gid_read(inode), &token);
+       btrfs_set_token_inode_size(leaf, item, BTRFS_I(inode)->disk_i_size,
+                                  &token);
+       btrfs_set_token_inode_mode(leaf, item, inode->i_mode, &token);
+       btrfs_set_token_inode_nlink(leaf, item, inode->i_nlink, &token);
 
-       btrfs_set_timespec_sec(leaf, btrfs_inode_atime(item),
-                              inode->i_atime.tv_sec);
-       btrfs_set_timespec_nsec(leaf, btrfs_inode_atime(item),
-                               inode->i_atime.tv_nsec);
+       btrfs_set_token_timespec_sec(leaf, btrfs_inode_atime(item),
+                                    inode->i_atime.tv_sec, &token);
+       btrfs_set_token_timespec_nsec(leaf, btrfs_inode_atime(item),
+                                     inode->i_atime.tv_nsec, &token);
 
-       btrfs_set_timespec_sec(leaf, btrfs_inode_mtime(item),
-                              inode->i_mtime.tv_sec);
-       btrfs_set_timespec_nsec(leaf, btrfs_inode_mtime(item),
-                               inode->i_mtime.tv_nsec);
+       btrfs_set_token_timespec_sec(leaf, btrfs_inode_mtime(item),
+                                    inode->i_mtime.tv_sec, &token);
+       btrfs_set_token_timespec_nsec(leaf, btrfs_inode_mtime(item),
+                                     inode->i_mtime.tv_nsec, &token);
 
-       btrfs_set_timespec_sec(leaf, btrfs_inode_ctime(item),
-                              inode->i_ctime.tv_sec);
-       btrfs_set_timespec_nsec(leaf, btrfs_inode_ctime(item),
-                               inode->i_ctime.tv_nsec);
+       btrfs_set_token_timespec_sec(leaf, btrfs_inode_ctime(item),
+                                    inode->i_ctime.tv_sec, &token);
+       btrfs_set_token_timespec_nsec(leaf, btrfs_inode_ctime(item),
+                                     inode->i_ctime.tv_nsec, &token);
 
-       btrfs_set_inode_nbytes(leaf, item, inode_get_bytes(inode));
-       btrfs_set_inode_generation(leaf, item, BTRFS_I(inode)->generation);
-       btrfs_set_inode_sequence(leaf, item, inode->i_version);
-       btrfs_set_inode_transid(leaf, item, trans->transid);
-       btrfs_set_inode_rdev(leaf, item, inode->i_rdev);
-       btrfs_set_inode_flags(leaf, item, BTRFS_I(inode)->flags);
-       btrfs_set_inode_block_group(leaf, item, 0);
+       btrfs_set_token_inode_nbytes(leaf, item, inode_get_bytes(inode),
+                                    &token);
+       btrfs_set_token_inode_generation(leaf, item, BTRFS_I(inode)->generation,
+                                        &token);
+       btrfs_set_token_inode_sequence(leaf, item, inode->i_version, &token);
+       btrfs_set_token_inode_transid(leaf, item, trans->transid, &token);
+       btrfs_set_token_inode_rdev(leaf, item, inode->i_rdev, &token);
+       btrfs_set_token_inode_flags(leaf, item, BTRFS_I(inode)->flags, &token);
+       btrfs_set_token_inode_block_group(leaf, item, 0, &token);
 }
 
 /*
@@ -3074,7 +3150,6 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry)
        struct btrfs_trans_handle *trans;
        struct inode *inode = dentry->d_inode;
        int ret;
-       unsigned long nr = 0;
 
        trans = __unlink_start_trans(dir, dentry);
        if (IS_ERR(trans))
@@ -3094,9 +3169,8 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry)
        }
 
 out:
-       nr = trans->blocks_used;
        __unlink_end_trans(trans, root);
-       btrfs_btree_balance_dirty(root, nr);
+       btrfs_btree_balance_dirty(root);
        return ret;
 }
 
@@ -3186,7 +3260,6 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
        int err = 0;
        struct btrfs_root *root = BTRFS_I(dir)->root;
        struct btrfs_trans_handle *trans;
-       unsigned long nr = 0;
 
        if (inode->i_size > BTRFS_EMPTY_DIR_SIZE)
                return -ENOTEMPTY;
@@ -3215,9 +3288,8 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
        if (!err)
                btrfs_i_size_write(inode, 0);
 out:
-       nr = trans->blocks_used;
        __unlink_end_trans(trans, root);
-       btrfs_btree_balance_dirty(root, nr);
+       btrfs_btree_balance_dirty(root);
 
        return err;
 }
@@ -3497,11 +3569,11 @@ int btrfs_truncate_page(struct inode *inode, loff_t from, loff_t len,
        if (ret)
                goto out;
 
-       ret = -ENOMEM;
 again:
        page = find_or_create_page(mapping, index, mask);
        if (!page) {
                btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE);
+               ret = -ENOMEM;
                goto out;
        }
 
@@ -3550,7 +3622,6 @@ again:
                goto out_unlock;
        }
 
-       ret = 0;
        if (offset != PAGE_CACHE_SIZE) {
                if (!len)
                        len = PAGE_CACHE_SIZE - offset;
@@ -3621,6 +3692,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
                                block_end - cur_offset, 0);
                if (IS_ERR(em)) {
                        err = PTR_ERR(em);
+                       em = NULL;
                        break;
                }
                last_byte = min(extent_map_end(em), block_end);
@@ -3668,6 +3740,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
 
                        hole_em->block_start = EXTENT_MAP_HOLE;
                        hole_em->block_len = 0;
+                       hole_em->orig_block_len = 0;
                        hole_em->bdev = root->fs_info->fs_devices->latest_bdev;
                        hole_em->compress_type = BTRFS_COMPRESS_NONE;
                        hole_em->generation = trans->transid;
@@ -3703,16 +3776,27 @@ next:
        return err;
 }
 
-static int btrfs_setsize(struct inode *inode, loff_t newsize)
+static int btrfs_setsize(struct inode *inode, struct iattr *attr)
 {
        struct btrfs_root *root = BTRFS_I(inode)->root;
        struct btrfs_trans_handle *trans;
        loff_t oldsize = i_size_read(inode);
+       loff_t newsize = attr->ia_size;
+       int mask = attr->ia_valid;
        int ret;
 
        if (newsize == oldsize)
                return 0;
 
+       /*
+        * The regular truncate() case without ATTR_CTIME and ATTR_MTIME is a
+        * special case where we need to update the times despite not having
+        * these flags set.  For all other operations the VFS set these flags
+        * explicitly if it wants a timestamp update.
+        */
+       if (newsize != oldsize && (!(mask & (ATTR_CTIME | ATTR_MTIME))))
+               inode->i_ctime = inode->i_mtime = current_fs_time(inode->i_sb);
+
        if (newsize > oldsize) {
                truncate_pagecache(inode, oldsize, newsize);
                ret = btrfs_cont_expand(inode, oldsize, newsize);
@@ -3738,9 +3822,34 @@ static int btrfs_setsize(struct inode *inode, loff_t newsize)
                        set_bit(BTRFS_INODE_ORDERED_DATA_CLOSE,
                                &BTRFS_I(inode)->runtime_flags);
 
+               /*
+                * 1 for the orphan item we're going to add
+                * 1 for the orphan item deletion.
+                */
+               trans = btrfs_start_transaction(root, 2);
+               if (IS_ERR(trans))
+                       return PTR_ERR(trans);
+
+               /*
+                * We need to do this in case we fail at _any_ point during the
+                * actual truncate.  Once we do the truncate_setsize we could
+                * invalidate pages which forces any outstanding ordered io to
+                * be instantly completed which will give us extents that need
+                * to be truncated.  If we fail to get an orphan inode down we
+                * could have left over extents that were never meant to live,
+                * so we need to garuntee from this point on that everything
+                * will be consistent.
+                */
+               ret = btrfs_orphan_add(trans, inode);
+               btrfs_end_transaction(trans, root);
+               if (ret)
+                       return ret;
+
                /* we don't support swapfiles, so vmtruncate shouldn't fail */
                truncate_setsize(inode, newsize);
                ret = btrfs_truncate(inode);
+               if (ret && inode->i_nlink)
+                       btrfs_orphan_del(NULL, inode);
        }
 
        return ret;
@@ -3760,7 +3869,7 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr)
                return err;
 
        if (S_ISREG(inode->i_mode) && (attr->ia_valid & ATTR_SIZE)) {
-               err = btrfs_setsize(inode, attr->ia_size);
+               err = btrfs_setsize(inode, attr);
                if (err)
                        return err;
        }
@@ -3783,7 +3892,6 @@ void btrfs_evict_inode(struct inode *inode)
        struct btrfs_root *root = BTRFS_I(inode)->root;
        struct btrfs_block_rsv *rsv, *global_rsv;
        u64 min_size = btrfs_calc_trunc_metadata_size(root, 1);
-       unsigned long nr;
        int ret;
 
        trace_btrfs_inode_evict(inode);
@@ -3811,6 +3919,12 @@ void btrfs_evict_inode(struct inode *inode)
                goto no_delete;
        }
 
+       ret = btrfs_commit_inode_delayed_inode(inode);
+       if (ret) {
+               btrfs_orphan_del(NULL, inode);
+               goto no_delete;
+       }
+
        rsv = btrfs_alloc_block_rsv(root, BTRFS_BLOCK_RSV_TEMP);
        if (!rsv) {
                btrfs_orphan_del(NULL, inode);
@@ -3829,7 +3943,8 @@ void btrfs_evict_inode(struct inode *inode)
         * inode item when doing the truncate.
         */
        while (1) {
-               ret = btrfs_block_rsv_refill_noflush(root, rsv, min_size);
+               ret = btrfs_block_rsv_refill(root, rsv, min_size,
+                                            BTRFS_RESERVE_FLUSH_LIMIT);
 
                /*
                 * Try and steal from the global reserve since we will
@@ -3847,7 +3962,7 @@ void btrfs_evict_inode(struct inode *inode)
                        goto no_delete;
                }
 
-               trans = btrfs_start_transaction_noflush(root, 1);
+               trans = btrfs_join_transaction(root);
                if (IS_ERR(trans)) {
                        btrfs_orphan_del(NULL, inode);
                        btrfs_free_block_rsv(root, rsv);
@@ -3861,13 +3976,9 @@ void btrfs_evict_inode(struct inode *inode)
                        break;
 
                trans->block_rsv = &root->fs_info->trans_block_rsv;
-               ret = btrfs_update_inode(trans, root, inode);
-               BUG_ON(ret);
-
-               nr = trans->blocks_used;
                btrfs_end_transaction(trans, root);
                trans = NULL;
-               btrfs_btree_balance_dirty(root, nr);
+               btrfs_btree_balance_dirty(root);
        }
 
        btrfs_free_block_rsv(root, rsv);
@@ -3883,9 +3994,8 @@ void btrfs_evict_inode(struct inode *inode)
              root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID))
                btrfs_return_ino(root, btrfs_ino(inode));
 
-       nr = trans->blocks_used;
        btrfs_end_transaction(trans, root);
-       btrfs_btree_balance_dirty(root, nr);
+       btrfs_btree_balance_dirty(root);
 no_delete:
        clear_inode(inode);
        return;
@@ -4775,8 +4885,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
        if (S_ISREG(mode)) {
                if (btrfs_test_opt(root, NODATASUM))
                        BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM;
-               if (btrfs_test_opt(root, NODATACOW) ||
-                   (BTRFS_I(dir)->flags & BTRFS_INODE_NODATACOW))
+               if (btrfs_test_opt(root, NODATACOW))
                        BTRFS_I(inode)->flags |= BTRFS_INODE_NODATACOW;
        }
 
@@ -4842,7 +4951,7 @@ int btrfs_add_link(struct btrfs_trans_handle *trans,
        ret = btrfs_insert_dir_item(trans, root, name, name_len,
                                    parent_inode, &key,
                                    btrfs_inode_type(inode), index);
-       if (ret == -EEXIST)
+       if (ret == -EEXIST || ret == -EOVERFLOW)
                goto fail_dir_item;
        else if (ret) {
                btrfs_abort_transaction(trans, root, ret);
@@ -4897,7 +5006,6 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
        int err;
        int drop_inode = 0;
        u64 objectid;
-       unsigned long nr = 0;
        u64 index = 0;
 
        if (!new_valid_dev(rdev))
@@ -4947,9 +5055,8 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
                d_instantiate(dentry, inode);
        }
 out_unlock:
-       nr = trans->blocks_used;
        btrfs_end_transaction(trans, root);
-       btrfs_btree_balance_dirty(root, nr);
+       btrfs_btree_balance_dirty(root);
        if (drop_inode) {
                inode_dec_link_count(inode);
                iput(inode);
@@ -4963,9 +5070,8 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry,
        struct btrfs_trans_handle *trans;
        struct btrfs_root *root = BTRFS_I(dir)->root;
        struct inode *inode = NULL;
-       int drop_inode = 0;
+       int drop_inode_on_err = 0;
        int err;
-       unsigned long nr = 0;
        u64 objectid;
        u64 index = 0;
 
@@ -4989,12 +5095,15 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry,
                err = PTR_ERR(inode);
                goto out_unlock;
        }
+       drop_inode_on_err = 1;
 
        err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name);
-       if (err) {
-               drop_inode = 1;
+       if (err)
+               goto out_unlock;
+
+       err = btrfs_update_inode(trans, root, inode);
+       if (err)
                goto out_unlock;
-       }
 
        /*
        * If the active LSM wants to access the inode during
@@ -5007,21 +5116,20 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry,
 
        err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index);
        if (err)
-               drop_inode = 1;
-       else {
-               inode->i_mapping->a_ops = &btrfs_aops;
-               inode->i_mapping->backing_dev_info = &root->fs_info->bdi;
-               BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
-               d_instantiate(dentry, inode);
-       }
+               goto out_unlock;
+
+       inode->i_mapping->a_ops = &btrfs_aops;
+       inode->i_mapping->backing_dev_info = &root->fs_info->bdi;
+       BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
+       d_instantiate(dentry, inode);
+
 out_unlock:
-       nr = trans->blocks_used;
        btrfs_end_transaction(trans, root);
-       if (drop_inode) {
+       if (err && drop_inode_on_err) {
                inode_dec_link_count(inode);
                iput(inode);
        }
-       btrfs_btree_balance_dirty(root, nr);
+       btrfs_btree_balance_dirty(root);
        return err;
 }
 
@@ -5032,7 +5140,6 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
        struct btrfs_root *root = BTRFS_I(dir)->root;
        struct inode *inode = old_dentry->d_inode;
        u64 index;
-       unsigned long nr = 0;
        int err;
        int drop_inode = 0;
 
@@ -5062,6 +5169,7 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
        inode_inc_iversion(inode);
        inode->i_ctime = CURRENT_TIME;
        ihold(inode);
+       set_bit(BTRFS_INODE_COPY_EVERYTHING, &BTRFS_I(inode)->runtime_flags);
 
        err = btrfs_add_nondir(trans, dir, dentry, inode, 1, index);
 
@@ -5076,14 +5184,13 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
                btrfs_log_new_name(trans, inode, NULL, parent);
        }
 
-       nr = trans->blocks_used;
        btrfs_end_transaction(trans, root);
 fail:
        if (drop_inode) {
                inode_dec_link_count(inode);
                iput(inode);
        }
-       btrfs_btree_balance_dirty(root, nr);
+       btrfs_btree_balance_dirty(root);
        return err;
 }
 
@@ -5096,7 +5203,6 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
        int drop_on_err = 0;
        u64 objectid = 0;
        u64 index = 0;
-       unsigned long nr = 1;
 
        /*
         * 2 items for inode and ref
@@ -5142,11 +5248,10 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
        drop_on_err = 0;
 
 out_fail:
-       nr = trans->blocks_used;
        btrfs_end_transaction(trans, root);
        if (drop_on_err)
                iput(inode);
-       btrfs_btree_balance_dirty(root, nr);
+       btrfs_btree_balance_dirty(root);
        return err;
 }
 
@@ -5340,6 +5445,7 @@ again:
                if (start + len <= found_key.offset)
                        goto not_found;
                em->start = start;
+               em->orig_start = start;
                em->len = found_key.offset - start;
                goto not_found_em;
        }
@@ -5350,6 +5456,8 @@ again:
                em->len = extent_end - extent_start;
                em->orig_start = extent_start -
                                 btrfs_file_extent_offset(leaf, item);
+               em->orig_block_len = btrfs_file_extent_disk_num_bytes(leaf,
+                                                                     item);
                bytenr = btrfs_file_extent_disk_bytenr(leaf, item);
                if (bytenr == 0) {
                        em->block_start = EXTENT_MAP_HOLE;
@@ -5359,8 +5467,7 @@ again:
                        set_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
                        em->compress_type = compress_type;
                        em->block_start = bytenr;
-                       em->block_len = btrfs_file_extent_disk_num_bytes(leaf,
-                                                                        item);
+                       em->block_len = em->orig_block_len;
                } else {
                        bytenr += btrfs_file_extent_offset(leaf, item);
                        em->block_start = bytenr;
@@ -5390,7 +5497,8 @@ again:
                em->start = extent_start + extent_offset;
                em->len = (copy_size + root->sectorsize - 1) &
                        ~((u64)root->sectorsize - 1);
-               em->orig_start = EXTENT_MAP_INLINE;
+               em->orig_block_len = em->len;
+               em->orig_start = em->start;
                if (compress_type) {
                        set_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
                        em->compress_type = compress_type;
@@ -5439,11 +5547,11 @@ again:
                                    extent_map_end(em) - 1, NULL, GFP_NOFS);
                goto insert;
        } else {
-               printk(KERN_ERR "btrfs unknown found_type %d\n", found_type);
-               WARN_ON(1);
+               WARN(1, KERN_ERR "btrfs unknown found_type %d\n", found_type);
        }
 not_found:
        em->start = start;
+       em->orig_start = start;
        em->len = len;
 not_found_em:
        em->block_start = EXTENT_MAP_HOLE;
@@ -5539,10 +5647,13 @@ struct extent_map *btrfs_get_extent_fiemap(struct inode *inode, struct page *pag
                return em;
        if (em) {
                /*
-                * if our em maps to a hole, there might
-                * actually be delalloc bytes behind it
+                * if our em maps to
+                * -  a hole or
+                * -  a pre-alloc extent,
+                * there might actually be delalloc bytes behind it.
                 */
-               if (em->block_start != EXTENT_MAP_HOLE)
+               if (em->block_start != EXTENT_MAP_HOLE &&
+                   !test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
                        return em;
                else
                        hole_em = em;
@@ -5624,6 +5735,8 @@ struct extent_map *btrfs_get_extent_fiemap(struct inode *inode, struct page *pag
                         */
                        em->block_start = hole_em->block_start;
                        em->block_len = hole_len;
+                       if (test_bit(EXTENT_FLAG_PREALLOC, &hole_em->flags))
+                               set_bit(EXTENT_FLAG_PREALLOC, &em->flags);
                } else {
                        em->start = range_start;
                        em->len = found;
@@ -5645,38 +5758,19 @@ out:
 }
 
 static struct extent_map *btrfs_new_extent_direct(struct inode *inode,
-                                                 struct extent_map *em,
                                                  u64 start, u64 len)
 {
        struct btrfs_root *root = BTRFS_I(inode)->root;
        struct btrfs_trans_handle *trans;
-       struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
+       struct extent_map *em;
        struct btrfs_key ins;
        u64 alloc_hint;
        int ret;
-       bool insert = false;
-
-       /*
-        * Ok if the extent map we looked up is a hole and is for the exact
-        * range we want, there is no reason to allocate a new one, however if
-        * it is not right then we need to free this one and drop the cache for
-        * our range.
-        */
-       if (em->block_start != EXTENT_MAP_HOLE || em->start != start ||
-           em->len != len) {
-               free_extent_map(em);
-               em = NULL;
-               insert = true;
-               btrfs_drop_extent_cache(inode, start, start + len - 1, 0);
-       }
 
        trans = btrfs_join_transaction(root);
        if (IS_ERR(trans))
                return ERR_CAST(trans);
 
-       if (start <= BTRFS_I(inode)->disk_i_size && len < 64 * 1024)
-               btrfs_add_inode_defrag(trans, inode);
-
        trans->block_rsv = &root->fs_info->delalloc_block_rsv;
 
        alloc_hint = get_extent_allocation_hint(inode, start, len);
@@ -5687,37 +5781,10 @@ static struct extent_map *btrfs_new_extent_direct(struct inode *inode,
                goto out;
        }
 
-       if (!em) {
-               em = alloc_extent_map();
-               if (!em) {
-                       em = ERR_PTR(-ENOMEM);
-                       goto out;
-               }
-       }
-
-       em->start = start;
-       em->orig_start = em->start;
-       em->len = ins.offset;
-
-       em->block_start = ins.objectid;
-       em->block_len = ins.offset;
-       em->bdev = root->fs_info->fs_devices->latest_bdev;
-
-       /*
-        * We need to do this because if we're using the original em we searched
-        * for, we could have EXTENT_FLAG_VACANCY set, and we don't want that.
-        */
-       em->flags = 0;
-       set_bit(EXTENT_FLAG_PINNED, &em->flags);
-
-       while (insert) {
-               write_lock(&em_tree->lock);
-               ret = add_extent_mapping(em_tree, em);
-               write_unlock(&em_tree->lock);
-               if (ret != -EEXIST)
-                       break;
-               btrfs_drop_extent_cache(inode, start, start + em->len - 1, 0);
-       }
+       em = create_pinned_em(inode, start, ins.offset, start, ins.objectid,
+                             ins.offset, ins.offset, 0);
+       if (IS_ERR(em))
+               goto out;
 
        ret = btrfs_add_ordered_extent_dio(inode, start, ins.objectid,
                                           ins.offset, ins.offset, 0);
@@ -5894,7 +5961,7 @@ static int lock_extent_direct(struct inode *inode, u64 lockstart, u64 lockend,
 static struct extent_map *create_pinned_em(struct inode *inode, u64 start,
                                           u64 len, u64 orig_start,
                                           u64 block_start, u64 block_len,
-                                          int type)
+                                          u64 orig_block_len, int type)
 {
        struct extent_map_tree *em_tree;
        struct extent_map *em;
@@ -5908,19 +5975,26 @@ static struct extent_map *create_pinned_em(struct inode *inode, u64 start,
 
        em->start = start;
        em->orig_start = orig_start;
+       em->mod_start = start;
+       em->mod_len = len;
        em->len = len;
        em->block_len = block_len;
        em->block_start = block_start;
        em->bdev = root->fs_info->fs_devices->latest_bdev;
+       em->orig_block_len = orig_block_len;
+       em->generation = -1;
        set_bit(EXTENT_FLAG_PINNED, &em->flags);
        if (type == BTRFS_ORDERED_PREALLOC)
-               set_bit(EXTENT_FLAG_PREALLOC, &em->flags);
+               set_bit(EXTENT_FLAG_FILLING, &em->flags);
 
        do {
                btrfs_drop_extent_cache(inode, em->start,
                                em->start + em->len - 1, 0);
                write_lock(&em_tree->lock);
                ret = add_extent_mapping(em_tree, em);
+               if (!ret)
+                       list_move(&em->list,
+                                 &em_tree->modified_extents);
                write_unlock(&em_tree->lock);
        } while (ret == -EEXIST);
 
@@ -6047,13 +6121,15 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
                        goto must_cow;
 
                if (can_nocow_odirect(trans, inode, start, len) == 1) {
-                       u64 orig_start = em->start;
+                       u64 orig_start = em->orig_start;
+                       u64 orig_block_len = em->orig_block_len;
 
                        if (type == BTRFS_ORDERED_PREALLOC) {
                                free_extent_map(em);
                                em = create_pinned_em(inode, start, len,
                                                       orig_start,
-                                                      block_start, len, type);
+                                                      block_start, len,
+                                                      orig_block_len, type);
                                if (IS_ERR(em)) {
                                        btrfs_end_transaction(trans, root);
                                        goto unlock_err;
@@ -6077,7 +6153,8 @@ must_cow:
         * it above
         */
        len = bh_result->b_size;
-       em = btrfs_new_extent_direct(inode, em, start, len);
+       free_extent_map(em);
+       em = btrfs_new_extent_direct(inode, start, len);
        if (IS_ERR(em)) {
                ret = PTR_ERR(em);
                goto unlock_err;
@@ -6318,6 +6395,9 @@ static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode,
        struct btrfs_root *root = BTRFS_I(inode)->root;
        int ret;
 
+       if (async_submit)
+               async_submit = !atomic_read(&BTRFS_I(inode)->sync_writers);
+
        bio_get(bio);
 
        if (!write) {
@@ -6362,7 +6442,6 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
 {
        struct inode *inode = dip->inode;
        struct btrfs_root *root = BTRFS_I(inode)->root;
-       struct btrfs_mapping_tree *map_tree = &root->fs_info->mapping_tree;
        struct bio *bio;
        struct bio *orig_bio = dip->orig_bio;
        struct bio_vec *bvec = orig_bio->bi_io_vec;
@@ -6375,7 +6454,7 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
        int async_submit = 0;
 
        map_length = orig_bio->bi_size;
-       ret = btrfs_map_block(map_tree, READ, start_sector << 9,
+       ret = btrfs_map_block(root->fs_info, READ, start_sector << 9,
                              &map_length, NULL, 0);
        if (ret) {
                bio_put(orig_bio);
@@ -6429,7 +6508,8 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
                        bio->bi_end_io = btrfs_end_dio_bio;
 
                        map_length = orig_bio->bi_size;
-                       ret = btrfs_map_block(map_tree, READ, start_sector << 9,
+                       ret = btrfs_map_block(root->fs_info, READ,
+                                             start_sector << 9,
                                              &map_length, NULL, 0);
                        if (ret) {
                                bio_put(bio);
@@ -6582,9 +6662,17 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
                   btrfs_submit_direct, 0);
 }
 
+#define BTRFS_FIEMAP_FLAGS     (FIEMAP_FLAG_SYNC)
+
 static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
                __u64 start, __u64 len)
 {
+       int     ret;
+
+       ret = fiemap_check_flags(fieinfo, BTRFS_FIEMAP_FLAGS);
+       if (ret)
+               return ret;
+
        return extent_fiemap(inode, fieinfo, start, len, btrfs_get_extent_fiemap);
 }
 
@@ -6675,8 +6763,7 @@ static void btrfs_invalidatepage(struct page *page, unsigned long offset)
                return;
        }
        lock_extent_bits(tree, page_start, page_end, 0, &cached_state);
-       ordered = btrfs_lookup_ordered_extent(inode,
-                                          page_offset(page));
+       ordered = btrfs_lookup_ordered_extent(inode, page_offset(page));
        if (ordered) {
                /*
                 * IO on this page will never be started, so we need
@@ -6855,7 +6942,6 @@ static int btrfs_truncate(struct inode *inode)
        int ret;
        int err = 0;
        struct btrfs_trans_handle *trans;
-       unsigned long nr;
        u64 mask = root->sectorsize - 1;
        u64 min_size = btrfs_calc_trunc_metadata_size(root, 1);
 
@@ -6910,11 +6996,9 @@ static int btrfs_truncate(struct inode *inode)
 
        /*
         * 1 for the truncate slack space
-        * 1 for the orphan item we're going to add
-        * 1 for the orphan item deletion
         * 1 for updating the inode.
         */
-       trans = btrfs_start_transaction(root, 4);
+       trans = btrfs_start_transaction(root, 2);
        if (IS_ERR(trans)) {
                err = PTR_ERR(trans);
                goto out;
@@ -6925,12 +7009,6 @@ static int btrfs_truncate(struct inode *inode)
                                      min_size);
        BUG_ON(ret);
 
-       ret = btrfs_orphan_add(trans, inode);
-       if (ret) {
-               btrfs_end_transaction(trans, root);
-               goto out;
-       }
-
        /*
         * setattr is responsible for setting the ordered_data_close flag,
         * but that is only tested during the last file release.  That
@@ -6978,9 +7056,8 @@ static int btrfs_truncate(struct inode *inode)
                        break;
                }
 
-               nr = trans->blocks_used;
                btrfs_end_transaction(trans, root);
-               btrfs_btree_balance_dirty(root, nr);
+               btrfs_btree_balance_dirty(root);
 
                trans = btrfs_start_transaction(root, 2);
                if (IS_ERR(trans)) {
@@ -7000,12 +7077,6 @@ static int btrfs_truncate(struct inode *inode)
                ret = btrfs_orphan_del(trans, inode);
                if (ret)
                        err = ret;
-       } else if (ret && inode->i_nlink > 0) {
-               /*
-                * Failed to do the truncate, remove us from the in memory
-                * orphan list.
-                */
-               ret = btrfs_orphan_del(NULL, inode);
        }
 
        if (trans) {
@@ -7014,9 +7085,8 @@ static int btrfs_truncate(struct inode *inode)
                if (ret && !err)
                        err = ret;
 
-               nr = trans->blocks_used;
                ret = btrfs_end_transaction(trans, root);
-               btrfs_btree_balance_dirty(root, nr);
+               btrfs_btree_balance_dirty(root);
        }
 
 out:
@@ -7093,6 +7163,7 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
        extent_io_tree_init(&ei->io_failure_tree, &inode->i_data);
        ei->io_tree.track_uptodate = 1;
        ei->io_failure_tree.track_uptodate = 1;
+       atomic_set(&ei->sync_writers, 0);
        mutex_init(&ei->log_mutex);
        mutex_init(&ei->delalloc_mutex);
        btrfs_ordered_inode_tree_init(&ei->ordered_tree);
@@ -7203,6 +7274,8 @@ void btrfs_destroy_cachep(void)
                kmem_cache_destroy(btrfs_path_cachep);
        if (btrfs_free_space_cachep)
                kmem_cache_destroy(btrfs_free_space_cachep);
+       if (btrfs_delalloc_work_cachep)
+               kmem_cache_destroy(btrfs_delalloc_work_cachep);
 }
 
 int btrfs_init_cachep(void)
@@ -7237,6 +7310,13 @@ int btrfs_init_cachep(void)
        if (!btrfs_free_space_cachep)
                goto fail;
 
+       btrfs_delalloc_work_cachep = kmem_cache_create("btrfs_delalloc_work",
+                       sizeof(struct btrfs_delalloc_work), 0,
+                       SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD,
+                       NULL);
+       if (!btrfs_delalloc_work_cachep)
+               goto fail;
+
        return 0;
 fail:
        btrfs_destroy_cachep();
@@ -7308,6 +7388,28 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
        if (S_ISDIR(old_inode->i_mode) && new_inode &&
            new_inode->i_size > BTRFS_EMPTY_DIR_SIZE)
                return -ENOTEMPTY;
+
+
+       /* check for collisions, even if the  name isn't there */
+       ret = btrfs_check_dir_item_collision(root, new_dir->i_ino,
+                            new_dentry->d_name.name,
+                            new_dentry->d_name.len);
+
+       if (ret) {
+               if (ret == -EEXIST) {
+                       /* we shouldn't get
+                        * eexist without a new_inode */
+                       if (!new_inode) {
+                               WARN_ON(1);
+                               return ret;
+                       }
+               } else {
+                       /* maybe -EOVERFLOW */
+                       return ret;
+               }
+       }
+       ret = 0;
+
        /*
         * we're using rename to replace one file with another.
         * and the replacement file is large.  Start IO on it now so
@@ -7447,39 +7549,103 @@ out_notrans:
        return ret;
 }
 
+static void btrfs_run_delalloc_work(struct btrfs_work *work)
+{
+       struct btrfs_delalloc_work *delalloc_work;
+
+       delalloc_work = container_of(work, struct btrfs_delalloc_work,
+                                    work);
+       if (delalloc_work->wait)
+               btrfs_wait_ordered_range(delalloc_work->inode, 0, (u64)-1);
+       else
+               filemap_flush(delalloc_work->inode->i_mapping);
+
+       if (delalloc_work->delay_iput)
+               btrfs_add_delayed_iput(delalloc_work->inode);
+       else
+               iput(delalloc_work->inode);
+       complete(&delalloc_work->completion);
+}
+
+struct btrfs_delalloc_work *btrfs_alloc_delalloc_work(struct inode *inode,
+                                                   int wait, int delay_iput)
+{
+       struct btrfs_delalloc_work *work;
+
+       work = kmem_cache_zalloc(btrfs_delalloc_work_cachep, GFP_NOFS);
+       if (!work)
+               return NULL;
+
+       init_completion(&work->completion);
+       INIT_LIST_HEAD(&work->list);
+       work->inode = inode;
+       work->wait = wait;
+       work->delay_iput = delay_iput;
+       work->work.func = btrfs_run_delalloc_work;
+
+       return work;
+}
+
+void btrfs_wait_and_free_delalloc_work(struct btrfs_delalloc_work *work)
+{
+       wait_for_completion(&work->completion);
+       kmem_cache_free(btrfs_delalloc_work_cachep, work);
+}
+
 /*
  * some fairly slow code that needs optimization. This walks the list
  * of all the inodes with pending delalloc and forces them to disk.
  */
 int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput)
 {
-       struct list_head *head = &root->fs_info->delalloc_inodes;
        struct btrfs_inode *binode;
        struct inode *inode;
+       struct btrfs_delalloc_work *work, *next;
+       struct list_head works;
+       struct list_head splice;
+       int ret = 0;
 
        if (root->fs_info->sb->s_flags & MS_RDONLY)
                return -EROFS;
 
+       INIT_LIST_HEAD(&works);
+       INIT_LIST_HEAD(&splice);
+
        spin_lock(&root->fs_info->delalloc_lock);
-       while (!list_empty(head)) {
-               binode = list_entry(head->next, struct btrfs_inode,
+       list_splice_init(&root->fs_info->delalloc_inodes, &splice);
+       while (!list_empty(&splice)) {
+               binode = list_entry(splice.next, struct btrfs_inode,
                                    delalloc_inodes);
+
+               list_del_init(&binode->delalloc_inodes);
+
                inode = igrab(&binode->vfs_inode);
                if (!inode)
-                       list_del_init(&binode->delalloc_inodes);
+                       continue;
+
+               list_add_tail(&binode->delalloc_inodes,
+                             &root->fs_info->delalloc_inodes);
                spin_unlock(&root->fs_info->delalloc_lock);
-               if (inode) {
-                       filemap_flush(inode->i_mapping);
-                       if (delay_iput)
-                               btrfs_add_delayed_iput(inode);
-                       else
-                               iput(inode);
+
+               work = btrfs_alloc_delalloc_work(inode, 0, delay_iput);
+               if (unlikely(!work)) {
+                       ret = -ENOMEM;
+                       goto out;
                }
+               list_add_tail(&work->list, &works);
+               btrfs_queue_worker(&root->fs_info->flush_workers,
+                                  &work->work);
+
                cond_resched();
                spin_lock(&root->fs_info->delalloc_lock);
        }
        spin_unlock(&root->fs_info->delalloc_lock);
 
+       list_for_each_entry_safe(work, next, &works, list) {
+               list_del_init(&work->list);
+               btrfs_wait_and_free_delalloc_work(work);
+       }
+
        /* the filemap_flush will queue IO into the worker threads, but
         * we have to make sure the IO is actually started and that
         * ordered extents get created before we return
@@ -7493,6 +7659,18 @@ int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput)
        }
        atomic_dec(&root->fs_info->async_submit_draining);
        return 0;
+out:
+       list_for_each_entry_safe(work, next, &works, list) {
+               list_del_init(&work->list);
+               btrfs_wait_and_free_delalloc_work(work);
+       }
+
+       if (!list_empty_careful(&splice)) {
+               spin_lock(&root->fs_info->delalloc_lock);
+               list_splice_tail(&splice, &root->fs_info->delalloc_inodes);
+               spin_unlock(&root->fs_info->delalloc_lock);
+       }
+       return ret;
 }
 
 static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
@@ -7512,7 +7690,6 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
        unsigned long ptr;
        struct btrfs_file_extent_item *ei;
        struct extent_buffer *leaf;
-       unsigned long nr = 0;
 
        name_len = strlen(symname) + 1;
        if (name_len > BTRFS_MAX_INLINE_DATA_SIZE(root))
@@ -7610,13 +7787,12 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
 out_unlock:
        if (!err)
                d_instantiate(dentry, inode);
-       nr = trans->blocks_used;
        btrfs_end_transaction(trans, root);
        if (drop_inode) {
                inode_dec_link_count(inode);
                iput(inode);
        }
-       btrfs_btree_balance_dirty(root, nr);
+       btrfs_btree_balance_dirty(root);
        return err;
 }
 
@@ -7679,6 +7855,7 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
                em->len = ins.offset;
                em->block_start = ins.objectid;
                em->block_len = ins.offset;
+               em->orig_block_len = ins.offset;
                em->bdev = root->fs_info->fs_devices->latest_bdev;
                set_bit(EXTENT_FLAG_PREALLOC, &em->flags);
                em->generation = trans->transid;