]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/blobdiff - fs/btrfs/inode.c
btrfs: fix btrfs_cont_expand() freeing IS_ERR em
[mirror_ubuntu-bionic-kernel.git] / fs / btrfs / inode.c
index 5d1675a8c9e2afb7db7b3cf9457106a15231d441..3d2c64d4734ac73ee056d9b9a300c4e965fb9d61 100644 (file)
@@ -95,6 +95,10 @@ static noinline int cow_file_range(struct inode *inode,
                                   struct page *locked_page,
                                   u64 start, u64 end, int *page_started,
                                   unsigned long *nr_written, int unlock);
+static struct extent_map *create_pinned_em(struct inode *inode, u64 start,
+                                          u64 len, u64 orig_start,
+                                          u64 block_start, u64 block_len,
+                                          u64 orig_block_len, int type);
 
 static int btrfs_init_inode_security(struct btrfs_trans_handle *trans,
                                     struct inode *inode,  struct inode *dir,
@@ -699,14 +703,19 @@ retry:
 
                em->block_start = ins.objectid;
                em->block_len = ins.offset;
+               em->orig_block_len = ins.offset;
                em->bdev = root->fs_info->fs_devices->latest_bdev;
                em->compress_type = async_extent->compress_type;
                set_bit(EXTENT_FLAG_PINNED, &em->flags);
                set_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
+               em->generation = -1;
 
                while (1) {
                        write_lock(&em_tree->lock);
                        ret = add_extent_mapping(em_tree, em);
+                       if (!ret)
+                               list_move(&em->list,
+                                         &em_tree->modified_extents);
                        write_unlock(&em_tree->lock);
                        if (ret != -EEXIST) {
                                free_extent_map(em);
@@ -886,12 +895,17 @@ static noinline int __cow_file_range(struct btrfs_trans_handle *trans,
 
                em->block_start = ins.objectid;
                em->block_len = ins.offset;
+               em->orig_block_len = ins.offset;
                em->bdev = root->fs_info->fs_devices->latest_bdev;
                set_bit(EXTENT_FLAG_PINNED, &em->flags);
+               em->generation = -1;
 
                while (1) {
                        write_lock(&em_tree->lock);
                        ret = add_extent_mapping(em_tree, em);
+                       if (!ret)
+                               list_move(&em->list,
+                                         &em_tree->modified_extents);
                        write_unlock(&em_tree->lock);
                        if (ret != -EEXIST) {
                                free_extent_map(em);
@@ -1143,6 +1157,7 @@ static noinline int run_delalloc_nocow(struct inode *inode,
        u64 extent_offset;
        u64 disk_bytenr;
        u64 num_bytes;
+       u64 disk_num_bytes;
        int extent_type;
        int ret, err;
        int type;
@@ -1245,6 +1260,8 @@ next_slot:
                        extent_offset = btrfs_file_extent_offset(leaf, fi);
                        extent_end = found_key.offset +
                                btrfs_file_extent_num_bytes(leaf, fi);
+                       disk_num_bytes =
+                               btrfs_file_extent_disk_num_bytes(leaf, fi);
                        if (extent_end <= start) {
                                path->slots[0]++;
                                goto next_slot;
@@ -1315,16 +1332,21 @@ out_check:
                        em = alloc_extent_map();
                        BUG_ON(!em); /* -ENOMEM */
                        em->start = cur_offset;
-                       em->orig_start = em->start;
+                       em->orig_start = found_key.offset - extent_offset;
                        em->len = num_bytes;
                        em->block_len = num_bytes;
                        em->block_start = disk_bytenr;
+                       em->orig_block_len = disk_num_bytes;
                        em->bdev = root->fs_info->fs_devices->latest_bdev;
                        set_bit(EXTENT_FLAG_PINNED, &em->flags);
-                       set_bit(EXTENT_FLAG_PREALLOC, &em->flags);
+                       set_bit(EXTENT_FLAG_FILLING, &em->flags);
+                       em->generation = -1;
                        while (1) {
                                write_lock(&em_tree->lock);
                                ret = add_extent_mapping(em_tree, em);
+                               if (!ret)
+                                       list_move(&em->list,
+                                                 &em_tree->modified_extents);
                                write_unlock(&em_tree->lock);
                                if (ret != -EEXIST) {
                                        free_extent_map(em);
@@ -1602,7 +1624,12 @@ static int __btrfs_submit_bio_done(struct inode *inode, int rw, struct bio *bio,
                          u64 bio_offset)
 {
        struct btrfs_root *root = BTRFS_I(inode)->root;
-       return btrfs_map_bio(root, rw, bio, mirror_num, 1);
+       int ret;
+
+       ret = btrfs_map_bio(root, rw, bio, mirror_num, 1);
+       if (ret)
+               bio_endio(bio, ret);
+       return ret;
 }
 
 /*
@@ -1617,6 +1644,7 @@ static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
        int ret = 0;
        int skip_sum;
        int metadata = 0;
+       int async = !atomic_read(&BTRFS_I(inode)->sync_writers);
 
        skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
 
@@ -1626,31 +1654,43 @@ static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
        if (!(rw & REQ_WRITE)) {
                ret = btrfs_bio_wq_end_io(root->fs_info, bio, metadata);
                if (ret)
-                       return ret;
+                       goto out;
 
                if (bio_flags & EXTENT_BIO_COMPRESSED) {
-                       return btrfs_submit_compressed_read(inode, bio,
-                                                   mirror_num, bio_flags);
+                       ret = btrfs_submit_compressed_read(inode, bio,
+                                                          mirror_num,
+                                                          bio_flags);
+                       goto out;
                } else if (!skip_sum) {
                        ret = btrfs_lookup_bio_sums(root, inode, bio, NULL);
                        if (ret)
-                               return ret;
+                               goto out;
                }
                goto mapit;
-       } else if (!skip_sum) {
+       } else if (async && !skip_sum) {
                /* csum items have already been cloned */
                if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
                        goto mapit;
                /* we're doing a write, do the async checksumming */
-               return btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info,
+               ret = btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info,
                                   inode, rw, bio, mirror_num,
                                   bio_flags, bio_offset,
                                   __btrfs_submit_bio_start,
                                   __btrfs_submit_bio_done);
+               goto out;
+       } else if (!skip_sum) {
+               ret = btrfs_csum_one_bio(root, inode, bio, 0, 0);
+               if (ret)
+                       goto out;
        }
 
 mapit:
-       return btrfs_map_bio(root, rw, bio, mirror_num, 0);
+       ret = btrfs_map_bio(root, rw, bio, mirror_num, 0);
+
+out:
+       if (ret < 0)
+               bio_endio(bio, ret);
+       return ret;
 }
 
 /*
@@ -1882,22 +1922,20 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
 
        if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) {
                BUG_ON(!list_empty(&ordered_extent->list)); /* Logic error */
-               ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent);
-               if (!ret) {
-                       if (nolock)
-                               trans = btrfs_join_transaction_nolock(root);
-                       else
-                               trans = btrfs_join_transaction(root);
-                       if (IS_ERR(trans)) {
-                               ret = PTR_ERR(trans);
-                               trans = NULL;
-                               goto out;
-                       }
-                       trans->block_rsv = &root->fs_info->delalloc_block_rsv;
-                       ret = btrfs_update_inode_fallback(trans, root, inode);
-                       if (ret) /* -ENOMEM or corruption */
-                               btrfs_abort_transaction(trans, root, ret);
+               btrfs_ordered_update_i_size(inode, 0, ordered_extent);
+               if (nolock)
+                       trans = btrfs_join_transaction_nolock(root);
+               else
+                       trans = btrfs_join_transaction(root);
+               if (IS_ERR(trans)) {
+                       ret = PTR_ERR(trans);
+                       trans = NULL;
+                       goto out;
                }
+               trans->block_rsv = &root->fs_info->delalloc_block_rsv;
+               ret = btrfs_update_inode_fallback(trans, root, inode);
+               if (ret) /* -ENOMEM or corruption */
+                       btrfs_abort_transaction(trans, root, ret);
                goto out;
        }
 
@@ -1946,15 +1984,11 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
        add_pending_csums(trans, inode, ordered_extent->file_offset,
                          &ordered_extent->list);
 
-       ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent);
-       if (!ret || !test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags)) {
-               ret = btrfs_update_inode_fallback(trans, root, inode);
-               if (ret) { /* -ENOMEM or corruption */
-                       btrfs_abort_transaction(trans, root, ret);
-                       goto out_unlock;
-               }
-       } else {
-               btrfs_set_inode_last_trans(trans, inode);
+       btrfs_ordered_update_i_size(inode, 0, ordered_extent);
+       ret = btrfs_update_inode_fallback(trans, root, inode);
+       if (ret) { /* -ENOMEM or corruption */
+               btrfs_abort_transaction(trans, root, ret);
+               goto out_unlock;
        }
        ret = 0;
 out_unlock:
@@ -2444,6 +2478,18 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
                                continue;
                        }
                        nr_truncate++;
+
+                       /* 1 for the orphan item deletion. */
+                       trans = btrfs_start_transaction(root, 1);
+                       if (IS_ERR(trans)) {
+                               ret = PTR_ERR(trans);
+                               goto out;
+                       }
+                       ret = btrfs_orphan_add(trans, inode);
+                       btrfs_end_transaction(trans, root);
+                       if (ret)
+                               goto out;
+
                        ret = btrfs_truncate(inode);
                } else {
                        nr_unlink++;
@@ -3508,11 +3554,11 @@ int btrfs_truncate_page(struct inode *inode, loff_t from, loff_t len,
        if (ret)
                goto out;
 
-       ret = -ENOMEM;
 again:
        page = find_or_create_page(mapping, index, mask);
        if (!page) {
                btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE);
+               ret = -ENOMEM;
                goto out;
        }
 
@@ -3561,7 +3607,6 @@ again:
                goto out_unlock;
        }
 
-       ret = 0;
        if (offset != PAGE_CACHE_SIZE) {
                if (!len)
                        len = PAGE_CACHE_SIZE - offset;
@@ -3632,6 +3677,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
                                block_end - cur_offset, 0);
                if (IS_ERR(em)) {
                        err = PTR_ERR(em);
+                       em = NULL;
                        break;
                }
                last_byte = min(extent_map_end(em), block_end);
@@ -3679,6 +3725,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
 
                        hole_em->block_start = EXTENT_MAP_HOLE;
                        hole_em->block_len = 0;
+                       hole_em->orig_block_len = 0;
                        hole_em->bdev = root->fs_info->fs_devices->latest_bdev;
                        hole_em->compress_type = BTRFS_COMPRESS_NONE;
                        hole_em->generation = trans->transid;
@@ -3749,9 +3796,34 @@ static int btrfs_setsize(struct inode *inode, loff_t newsize)
                        set_bit(BTRFS_INODE_ORDERED_DATA_CLOSE,
                                &BTRFS_I(inode)->runtime_flags);
 
+               /*
+                * 1 for the orphan item we're going to add
+                * 1 for the orphan item deletion.
+                */
+               trans = btrfs_start_transaction(root, 2);
+               if (IS_ERR(trans))
+                       return PTR_ERR(trans);
+
+               /*
+                * We need to do this in case we fail at _any_ point during the
+                * actual truncate.  Once we do the truncate_setsize we could
+                * invalidate pages which forces any outstanding ordered io to
+                * be instantly completed which will give us extents that need
+                * to be truncated.  If we fail to get an orphan inode down we
+                * could have left over extents that were never meant to live,
+                * so we need to garuntee from this point on that everything
+                * will be consistent.
+                */
+               ret = btrfs_orphan_add(trans, inode);
+               btrfs_end_transaction(trans, root);
+               if (ret)
+                       return ret;
+
                /* we don't support swapfiles, so vmtruncate shouldn't fail */
                truncate_setsize(inode, newsize);
                ret = btrfs_truncate(inode);
+               if (ret && inode->i_nlink)
+                       btrfs_orphan_del(NULL, inode);
        }
 
        return ret;
@@ -4784,8 +4856,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
        if (S_ISREG(mode)) {
                if (btrfs_test_opt(root, NODATASUM))
                        BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM;
-               if (btrfs_test_opt(root, NODATACOW) ||
-                   (BTRFS_I(dir)->flags & BTRFS_INODE_NODATACOW))
+               if (btrfs_test_opt(root, NODATACOW))
                        BTRFS_I(inode)->flags |= BTRFS_INODE_NODATACOW;
        }
 
@@ -4851,7 +4922,7 @@ int btrfs_add_link(struct btrfs_trans_handle *trans,
        ret = btrfs_insert_dir_item(trans, root, name, name_len,
                                    parent_inode, &key,
                                    btrfs_inode_type(inode), index);
-       if (ret == -EEXIST)
+       if (ret == -EEXIST || ret == -EOVERFLOW)
                goto fail_dir_item;
        else if (ret) {
                btrfs_abort_transaction(trans, root, ret);
@@ -4938,6 +5009,12 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
                goto out_unlock;
        }
 
+       err = btrfs_update_inode(trans, root, inode);
+       if (err) {
+               drop_inode = 1;
+               goto out_unlock;
+       }
+
        /*
        * If the active LSM wants to access the inode during
        * d_instantiate it needs these. Smack checks to see
@@ -4970,7 +5047,7 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry,
        struct btrfs_trans_handle *trans;
        struct btrfs_root *root = BTRFS_I(dir)->root;
        struct inode *inode = NULL;
-       int drop_inode = 0;
+       int drop_inode_on_err = 0;
        int err;
        u64 objectid;
        u64 index = 0;
@@ -4995,12 +5072,15 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry,
                err = PTR_ERR(inode);
                goto out_unlock;
        }
+       drop_inode_on_err = 1;
 
        err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name);
-       if (err) {
-               drop_inode = 1;
+       if (err)
+               goto out_unlock;
+
+       err = btrfs_update_inode(trans, root, inode);
+       if (err)
                goto out_unlock;
-       }
 
        /*
        * If the active LSM wants to access the inode during
@@ -5013,16 +5093,16 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry,
 
        err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index);
        if (err)
-               drop_inode = 1;
-       else {
-               inode->i_mapping->a_ops = &btrfs_aops;
-               inode->i_mapping->backing_dev_info = &root->fs_info->bdi;
-               BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
-               d_instantiate(dentry, inode);
-       }
+               goto out_unlock;
+
+       inode->i_mapping->a_ops = &btrfs_aops;
+       inode->i_mapping->backing_dev_info = &root->fs_info->bdi;
+       BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
+       d_instantiate(dentry, inode);
+
 out_unlock:
        btrfs_end_transaction(trans, root);
-       if (drop_inode) {
+       if (err && drop_inode_on_err) {
                inode_dec_link_count(inode);
                iput(inode);
        }
@@ -5066,6 +5146,7 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
        inode_inc_iversion(inode);
        inode->i_ctime = CURRENT_TIME;
        ihold(inode);
+       set_bit(BTRFS_INODE_COPY_EVERYTHING, &BTRFS_I(inode)->runtime_flags);
 
        err = btrfs_add_nondir(trans, dir, dentry, inode, 1, index);
 
@@ -5341,6 +5422,7 @@ again:
                if (start + len <= found_key.offset)
                        goto not_found;
                em->start = start;
+               em->orig_start = start;
                em->len = found_key.offset - start;
                goto not_found_em;
        }
@@ -5351,6 +5433,8 @@ again:
                em->len = extent_end - extent_start;
                em->orig_start = extent_start -
                                 btrfs_file_extent_offset(leaf, item);
+               em->orig_block_len = btrfs_file_extent_disk_num_bytes(leaf,
+                                                                     item);
                bytenr = btrfs_file_extent_disk_bytenr(leaf, item);
                if (bytenr == 0) {
                        em->block_start = EXTENT_MAP_HOLE;
@@ -5360,8 +5444,7 @@ again:
                        set_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
                        em->compress_type = compress_type;
                        em->block_start = bytenr;
-                       em->block_len = btrfs_file_extent_disk_num_bytes(leaf,
-                                                                        item);
+                       em->block_len = em->orig_block_len;
                } else {
                        bytenr += btrfs_file_extent_offset(leaf, item);
                        em->block_start = bytenr;
@@ -5391,7 +5474,8 @@ again:
                em->start = extent_start + extent_offset;
                em->len = (copy_size + root->sectorsize - 1) &
                        ~((u64)root->sectorsize - 1);
-               em->orig_start = EXTENT_MAP_INLINE;
+               em->orig_block_len = em->len;
+               em->orig_start = em->start;
                if (compress_type) {
                        set_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
                        em->compress_type = compress_type;
@@ -5444,6 +5528,7 @@ again:
        }
 not_found:
        em->start = start;
+       em->orig_start = start;
        em->len = len;
 not_found_em:
        em->block_start = EXTENT_MAP_HOLE;
@@ -5539,10 +5624,13 @@ struct extent_map *btrfs_get_extent_fiemap(struct inode *inode, struct page *pag
                return em;
        if (em) {
                /*
-                * if our em maps to a hole, there might
-                * actually be delalloc bytes behind it
+                * if our em maps to
+                * -  a hole or
+                * -  a pre-alloc extent,
+                * there might actually be delalloc bytes behind it.
                 */
-               if (em->block_start != EXTENT_MAP_HOLE)
+               if (em->block_start != EXTENT_MAP_HOLE &&
+                   !test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
                        return em;
                else
                        hole_em = em;
@@ -5624,6 +5712,8 @@ struct extent_map *btrfs_get_extent_fiemap(struct inode *inode, struct page *pag
                         */
                        em->block_start = hole_em->block_start;
                        em->block_len = hole_len;
+                       if (test_bit(EXTENT_FLAG_PREALLOC, &hole_em->flags))
+                               set_bit(EXTENT_FLAG_PREALLOC, &em->flags);
                } else {
                        em->start = range_start;
                        em->len = found;
@@ -5645,38 +5735,19 @@ out:
 }
 
 static struct extent_map *btrfs_new_extent_direct(struct inode *inode,
-                                                 struct extent_map *em,
                                                  u64 start, u64 len)
 {
        struct btrfs_root *root = BTRFS_I(inode)->root;
        struct btrfs_trans_handle *trans;
-       struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
+       struct extent_map *em;
        struct btrfs_key ins;
        u64 alloc_hint;
        int ret;
-       bool insert = false;
-
-       /*
-        * Ok if the extent map we looked up is a hole and is for the exact
-        * range we want, there is no reason to allocate a new one, however if
-        * it is not right then we need to free this one and drop the cache for
-        * our range.
-        */
-       if (em->block_start != EXTENT_MAP_HOLE || em->start != start ||
-           em->len != len) {
-               free_extent_map(em);
-               em = NULL;
-               insert = true;
-               btrfs_drop_extent_cache(inode, start, start + len - 1, 0);
-       }
 
        trans = btrfs_join_transaction(root);
        if (IS_ERR(trans))
                return ERR_CAST(trans);
 
-       if (start <= BTRFS_I(inode)->disk_i_size && len < 64 * 1024)
-               btrfs_add_inode_defrag(trans, inode);
-
        trans->block_rsv = &root->fs_info->delalloc_block_rsv;
 
        alloc_hint = get_extent_allocation_hint(inode, start, len);
@@ -5687,37 +5758,10 @@ static struct extent_map *btrfs_new_extent_direct(struct inode *inode,
                goto out;
        }
 
-       if (!em) {
-               em = alloc_extent_map();
-               if (!em) {
-                       em = ERR_PTR(-ENOMEM);
-                       goto out;
-               }
-       }
-
-       em->start = start;
-       em->orig_start = em->start;
-       em->len = ins.offset;
-
-       em->block_start = ins.objectid;
-       em->block_len = ins.offset;
-       em->bdev = root->fs_info->fs_devices->latest_bdev;
-
-       /*
-        * We need to do this because if we're using the original em we searched
-        * for, we could have EXTENT_FLAG_VACANCY set, and we don't want that.
-        */
-       em->flags = 0;
-       set_bit(EXTENT_FLAG_PINNED, &em->flags);
-
-       while (insert) {
-               write_lock(&em_tree->lock);
-               ret = add_extent_mapping(em_tree, em);
-               write_unlock(&em_tree->lock);
-               if (ret != -EEXIST)
-                       break;
-               btrfs_drop_extent_cache(inode, start, start + em->len - 1, 0);
-       }
+       em = create_pinned_em(inode, start, ins.offset, start, ins.objectid,
+                             ins.offset, ins.offset, 0);
+       if (IS_ERR(em))
+               goto out;
 
        ret = btrfs_add_ordered_extent_dio(inode, start, ins.objectid,
                                           ins.offset, ins.offset, 0);
@@ -5894,7 +5938,7 @@ static int lock_extent_direct(struct inode *inode, u64 lockstart, u64 lockend,
 static struct extent_map *create_pinned_em(struct inode *inode, u64 start,
                                           u64 len, u64 orig_start,
                                           u64 block_start, u64 block_len,
-                                          int type)
+                                          u64 orig_block_len, int type)
 {
        struct extent_map_tree *em_tree;
        struct extent_map *em;
@@ -5912,15 +5956,20 @@ static struct extent_map *create_pinned_em(struct inode *inode, u64 start,
        em->block_len = block_len;
        em->block_start = block_start;
        em->bdev = root->fs_info->fs_devices->latest_bdev;
+       em->orig_block_len = orig_block_len;
+       em->generation = -1;
        set_bit(EXTENT_FLAG_PINNED, &em->flags);
        if (type == BTRFS_ORDERED_PREALLOC)
-               set_bit(EXTENT_FLAG_PREALLOC, &em->flags);
+               set_bit(EXTENT_FLAG_FILLING, &em->flags);
 
        do {
                btrfs_drop_extent_cache(inode, em->start,
                                em->start + em->len - 1, 0);
                write_lock(&em_tree->lock);
                ret = add_extent_mapping(em_tree, em);
+               if (!ret)
+                       list_move(&em->list,
+                                 &em_tree->modified_extents);
                write_unlock(&em_tree->lock);
        } while (ret == -EEXIST);
 
@@ -6047,13 +6096,15 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
                        goto must_cow;
 
                if (can_nocow_odirect(trans, inode, start, len) == 1) {
-                       u64 orig_start = em->start;
+                       u64 orig_start = em->orig_start;
+                       u64 orig_block_len = em->orig_block_len;
 
                        if (type == BTRFS_ORDERED_PREALLOC) {
                                free_extent_map(em);
                                em = create_pinned_em(inode, start, len,
                                                       orig_start,
-                                                      block_start, len, type);
+                                                      block_start, len,
+                                                      orig_block_len, type);
                                if (IS_ERR(em)) {
                                        btrfs_end_transaction(trans, root);
                                        goto unlock_err;
@@ -6077,7 +6128,8 @@ must_cow:
         * it above
         */
        len = bh_result->b_size;
-       em = btrfs_new_extent_direct(inode, em, start, len);
+       free_extent_map(em);
+       em = btrfs_new_extent_direct(inode, start, len);
        if (IS_ERR(em)) {
                ret = PTR_ERR(em);
                goto unlock_err;
@@ -6318,6 +6370,9 @@ static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode,
        struct btrfs_root *root = BTRFS_I(inode)->root;
        int ret;
 
+       if (async_submit)
+               async_submit = !atomic_read(&BTRFS_I(inode)->sync_writers);
+
        bio_get(bio);
 
        if (!write) {
@@ -6582,9 +6637,17 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
                   btrfs_submit_direct, 0);
 }
 
+#define BTRFS_FIEMAP_FLAGS     (FIEMAP_FLAG_SYNC)
+
 static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
                __u64 start, __u64 len)
 {
+       int     ret;
+
+       ret = fiemap_check_flags(fieinfo, BTRFS_FIEMAP_FLAGS);
+       if (ret)
+               return ret;
+
        return extent_fiemap(inode, fieinfo, start, len, btrfs_get_extent_fiemap);
 }
 
@@ -6909,11 +6972,9 @@ static int btrfs_truncate(struct inode *inode)
 
        /*
         * 1 for the truncate slack space
-        * 1 for the orphan item we're going to add
-        * 1 for the orphan item deletion
         * 1 for updating the inode.
         */
-       trans = btrfs_start_transaction(root, 4);
+       trans = btrfs_start_transaction(root, 2);
        if (IS_ERR(trans)) {
                err = PTR_ERR(trans);
                goto out;
@@ -6924,12 +6985,6 @@ static int btrfs_truncate(struct inode *inode)
                                      min_size);
        BUG_ON(ret);
 
-       ret = btrfs_orphan_add(trans, inode);
-       if (ret) {
-               btrfs_end_transaction(trans, root);
-               goto out;
-       }
-
        /*
         * setattr is responsible for setting the ordered_data_close flag,
         * but that is only tested during the last file release.  That
@@ -6998,12 +7053,6 @@ static int btrfs_truncate(struct inode *inode)
                ret = btrfs_orphan_del(trans, inode);
                if (ret)
                        err = ret;
-       } else if (ret && inode->i_nlink > 0) {
-               /*
-                * Failed to do the truncate, remove us from the in memory
-                * orphan list.
-                */
-               ret = btrfs_orphan_del(NULL, inode);
        }
 
        if (trans) {
@@ -7090,6 +7139,7 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
        extent_io_tree_init(&ei->io_failure_tree, &inode->i_data);
        ei->io_tree.track_uptodate = 1;
        ei->io_failure_tree.track_uptodate = 1;
+       atomic_set(&ei->sync_writers, 0);
        mutex_init(&ei->log_mutex);
        mutex_init(&ei->delalloc_mutex);
        btrfs_ordered_inode_tree_init(&ei->ordered_tree);
@@ -7314,6 +7364,28 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
        if (S_ISDIR(old_inode->i_mode) && new_inode &&
            new_inode->i_size > BTRFS_EMPTY_DIR_SIZE)
                return -ENOTEMPTY;
+
+
+       /* check for collisions, even if the  name isn't there */
+       ret = btrfs_check_dir_item_collision(root, new_dir->i_ino,
+                            new_dentry->d_name.name,
+                            new_dentry->d_name.len);
+
+       if (ret) {
+               if (ret == -EEXIST) {
+                       /* we shouldn't get
+                        * eexist without a new_inode */
+                       if (!new_inode) {
+                               WARN_ON(1);
+                               return ret;
+                       }
+               } else {
+                       /* maybe -EOVERFLOW */
+                       return ret;
+               }
+       }
+       ret = 0;
+
        /*
         * we're using rename to replace one file with another.
         * and the replacement file is large.  Start IO on it now so
@@ -7739,6 +7811,7 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
                em->len = ins.offset;
                em->block_start = ins.objectid;
                em->block_len = ins.offset;
+               em->orig_block_len = ins.offset;
                em->bdev = root->fs_info->fs_devices->latest_bdev;
                set_bit(EXTENT_FLAG_PREALLOC, &em->flags);
                em->generation = trans->transid;