Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux...

author Linus Torvalds <torvalds@linux-foundation.org>

Tue, 9 Jul 2013 19:33:09 +0000 (12:33 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Tue, 9 Jul 2013 19:33:09 +0000 (12:33 -0700)
author Linus Torvalds <torvalds@linux-foundation.org>
Tue, 9 Jul 2013 19:33:09 +0000 (12:33 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Tue, 9 Jul 2013 19:33:09 +0000 (12:33 -0700)
diff --combined fs/btrfs/delayed-inode.c

index eb34438ddedbc8ca0377fd6410d831fb3e824f7e,5615eacc7e7f11eaa7501ace2bfc397250f6f29c..375510913fe744784f8f56966ed29693ee8e3612
--- 1/fs/btrfs/delayed-inode.c
--- 2/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@@ -535,20 -535,6 +535,6 @@@ static struct btrfs_delayed_item *__btr
         return next;
   }
   
- static inline struct btrfs_root *btrfs_get_fs_root(struct btrfs_root *root,
-                                                  u64 root_id)
- {
-       struct btrfs_key root_key;
- 
-       if (root->objectid == root_id)
-               return root;
- 
-       root_key.objectid = root_id;
-       root_key.type = BTRFS_ROOT_ITEM_KEY;
-       root_key.offset = (u64)-1;
-       return btrfs_read_fs_root_no_name(root->fs_info, &root_key);
- }
- 
   static int btrfs_delayed_item_reserve_metadata(struct btrfs_trans_handle *trans,
                                                struct btrfs_root *root,
                                                struct btrfs_delayed_item *item)
@@@ -1681,7 -1667,8 +1667,7 @@@ int btrfs_should_delete_dir_index(struc
    * btrfs_readdir_delayed_dir_index - read dir info stored in the delayed tree
    *
    */
- -int btrfs_readdir_delayed_dir_index(struct file *filp, void *dirent,
- -                                  filldir_t filldir,
+ +int btrfs_readdir_delayed_dir_index(struct dir_context *ctx,
                                     struct list_head *ins_list)
   {
         struct btrfs_dir_item *di;
@@@ -1703,13 -1690,13 +1689,13 @@@
         list_for_each_entry_safe(curr, next, ins_list, readdir_list) {
                 list_del(&curr->readdir_list);
   
- -              if (curr->key.offset < filp->f_pos) {
+ +              if (curr->key.offset < ctx->pos) {
                         if (atomic_dec_and_test(&curr->refs))
                                 kfree(curr);
                         continue;
                 }
   
- -              filp->f_pos = curr->key.offset;
+ +              ctx->pos = curr->key.offset;
   
                 di = (struct btrfs_dir_item *)curr->data;
                 name = (char *)(di + 1);
@@@ -1718,7 -1705,7 +1704,7 @@@
                 d_type = btrfs_filetype_table[di->type];
                 btrfs_disk_key_to_cpu(&location, &di->location);
   
- -              over = filldir(dirent, name, name_len, curr->key.offset,
+ +              over = !dir_emit(ctx, name, name_len,
                                location.objectid, d_type);
   
                 if (atomic_dec_and_test(&curr->refs))
diff --combined fs/btrfs/disk-io.c

index b0292b3ead54d1651ba47d7e9efcc567566dd1ed,3c2886ca7d8cbac3ffe353a0280a1831e53ec2ff..6b092a1c4e37bab47adb0e9fc35ae6ec3e6081f8
--- 1/fs/btrfs/disk-io.c
--- 2/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@@ -1013,8 -1013,7 +1013,8 @@@ static int btree_releasepage(struct pag
         return try_release_extent_buffer(page);
   }
   
- -static void btree_invalidatepage(struct page *page, unsigned long offset)
+ +static void btree_invalidatepage(struct page *page, unsigned int offset,
+ +                               unsigned int length)
   {
         struct extent_io_tree *tree;
         tree = &BTRFS_I(page->mapping->host)->io_tree;
@@@ -1192,6 -1191,8 +1192,8 @@@ static void __setup_root(u32 nodesize, 
         root->objectid = objectid;
         root->last_trans = 0;
         root->highest_objectid = 0;
+       root->nr_delalloc_inodes = 0;
+       root->nr_ordered_extents = 0;
         root->name = NULL;
         root->inode_tree = RB_ROOT;
         INIT_RADIX_TREE(&root->delayed_nodes_tree, GFP_ATOMIC);
@@@ -1200,10 -1201,16 +1202,16 @@@
   
         INIT_LIST_HEAD(&root->dirty_list);
         INIT_LIST_HEAD(&root->root_list);
+       INIT_LIST_HEAD(&root->delalloc_inodes);
+       INIT_LIST_HEAD(&root->delalloc_root);
+       INIT_LIST_HEAD(&root->ordered_extents);
+       INIT_LIST_HEAD(&root->ordered_root);
         INIT_LIST_HEAD(&root->logged_list[0]);
         INIT_LIST_HEAD(&root->logged_list[1]);
         spin_lock_init(&root->orphan_lock);
         spin_lock_init(&root->inode_lock);
+       spin_lock_init(&root->delalloc_lock);
+       spin_lock_init(&root->ordered_extent_lock);
         spin_lock_init(&root->accounting_lock);
         spin_lock_init(&root->log_extents_lock[0]);
         spin_lock_init(&root->log_extents_lock[1]);
@@@ -1217,6 -1224,7 +1225,7 @@@
         atomic_set(&root->log_writers, 0);
         atomic_set(&root->log_batch, 0);
         atomic_set(&root->orphan_inodes, 0);
+       atomic_set(&root->refs, 1);
         root->log_transid = 0;
         root->last_log_commit = 0;
         extent_io_tree_init(&root->dirty_log_pages,
@@@ -1235,39 -1243,6 +1244,6 @@@
         spin_lock_init(&root->root_item_lock);
   }
   
- static int __must_check find_and_setup_root(struct btrfs_root *tree_root,
-                                           struct btrfs_fs_info *fs_info,
-                                           u64 objectid,
-                                           struct btrfs_root *root)
- {
-       int ret;
-       u32 blocksize;
-       u64 generation;
- 
-       __setup_root(tree_root->nodesize, tree_root->leafsize,
-                    tree_root->sectorsize, tree_root->stripesize,
-                    root, fs_info, objectid);
-       ret = btrfs_find_last_root(tree_root, objectid,
-                                  &root->root_item, &root->root_key);
-       if (ret > 0)
-               return -ENOENT;
-       else if (ret < 0)
-               return ret;
- 
-       generation = btrfs_root_generation(&root->root_item);
-       blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item));
-       root->commit_root = NULL;
-       root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item),
-                                    blocksize, generation);
-       if (!root->node || !btrfs_buffer_uptodate(root->node, generation, 0)) {
-               free_extent_buffer(root->node);
-               root->node = NULL;
-               return -EIO;
-       }
-       root->commit_root = btrfs_root_node(root);
-       return 0;
- }
- 
   static struct btrfs_root *btrfs_alloc_root(struct btrfs_fs_info *fs_info)
   {
         struct btrfs_root *root = kzalloc(sizeof(*root), GFP_NOFS);
@@@ -1452,70 -1427,73 +1428,73 @@@ int btrfs_add_log_tree(struct btrfs_tra
         return 0;
   }
   
- struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root,
-                                              struct btrfs_key *location)
+ struct btrfs_root *btrfs_read_tree_root(struct btrfs_root *tree_root,
+                                       struct btrfs_key *key)
   {
         struct btrfs_root *root;
         struct btrfs_fs_info *fs_info = tree_root->fs_info;
         struct btrfs_path *path;
-       struct extent_buffer *l;
         u64 generation;
         u32 blocksize;
-       int ret = 0;
-       int slot;
+       int ret;
   
-       root = btrfs_alloc_root(fs_info);
-       if (!root)
+       path = btrfs_alloc_path();
+       if (!path)
                 return ERR_PTR(-ENOMEM);
-       if (location->offset == (u64)-1) {
-               ret = find_and_setup_root(tree_root, fs_info,
-                                         location->objectid, root);
-               if (ret) {
-                       kfree(root);
-                       return ERR_PTR(ret);
-               }
-               goto out;
+ 
+       root = btrfs_alloc_root(fs_info);
+       if (!root) {
+               ret = -ENOMEM;
+               goto alloc_fail;
         }
   
         __setup_root(tree_root->nodesize, tree_root->leafsize,
                      tree_root->sectorsize, tree_root->stripesize,
-                    root, fs_info, location->objectid);
+                    root, fs_info, key->objectid);
   
-       path = btrfs_alloc_path();
-       if (!path) {
-               kfree(root);
-               return ERR_PTR(-ENOMEM);
-       }
-       ret = btrfs_search_slot(NULL, tree_root, location, path, 0, 0);
-       if (ret == 0) {
-               l = path->nodes[0];
-               slot = path->slots[0];
-               btrfs_read_root_item(l, slot, &root->root_item);
-               memcpy(&root->root_key, location, sizeof(*location));
-       }
-       btrfs_free_path(path);
+       ret = btrfs_find_root(tree_root, key, path,
+                             &root->root_item, &root->root_key);
         if (ret) {
-               kfree(root);
                 if (ret > 0)
                         ret = -ENOENT;
-               return ERR_PTR(ret);
+               goto find_fail;
         }
   
         generation = btrfs_root_generation(&root->root_item);
         blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item));
         root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item),
                                      blocksize, generation);
-       if (!root->node || !extent_buffer_uptodate(root->node)) {
-               ret = (!root->node) ? -ENOMEM : -EIO;
- 
-               free_extent_buffer(root->node);
-               kfree(root);
-               return ERR_PTR(ret);
+       if (!root->node) {
+               ret = -ENOMEM;
+               goto find_fail;
+       } else if (!btrfs_buffer_uptodate(root->node, generation, 0)) {
+               ret = -EIO;
+               goto read_fail;
         }
- 
         root->commit_root = btrfs_root_node(root);
   out:
-       if (location->objectid != BTRFS_TREE_LOG_OBJECTID) {
+       btrfs_free_path(path);
+       return root;
+ 
+ read_fail:
+       free_extent_buffer(root->node);
+ find_fail:
+       kfree(root);
+ alloc_fail:
+       root = ERR_PTR(ret);
+       goto out;
+ }
+ 
+ struct btrfs_root *btrfs_read_fs_root(struct btrfs_root *tree_root,
+                                     struct btrfs_key *location)
+ {
+       struct btrfs_root *root;
+ 
+       root = btrfs_read_tree_root(tree_root, location);
+       if (IS_ERR(root))
+               return root;
+ 
+       if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) {
                 root->ref_cows = 1;
                 btrfs_check_and_init_root_item(&root->root_item);
         }
@@@ -1523,6 -1501,66 +1502,66 @@@
         return root;
   }
   
+ int btrfs_init_fs_root(struct btrfs_root *root)
+ {
+       int ret;
+ 
+       root->free_ino_ctl = kzalloc(sizeof(*root->free_ino_ctl), GFP_NOFS);
+       root->free_ino_pinned = kzalloc(sizeof(*root->free_ino_pinned),
+                                       GFP_NOFS);
+       if (!root->free_ino_pinned || !root->free_ino_ctl) {
+               ret = -ENOMEM;
+               goto fail;
+       }
+ 
+       btrfs_init_free_ino_ctl(root);
+       mutex_init(&root->fs_commit_mutex);
+       spin_lock_init(&root->cache_lock);
+       init_waitqueue_head(&root->cache_wait);
+ 
+       ret = get_anon_bdev(&root->anon_dev);
+       if (ret)
+               goto fail;
+       return 0;
+ fail:
+       kfree(root->free_ino_ctl);
+       kfree(root->free_ino_pinned);
+       return ret;
+ }
+ 
+ struct btrfs_root *btrfs_lookup_fs_root(struct btrfs_fs_info *fs_info,
+                                       u64 root_id)
+ {
+       struct btrfs_root *root;
+ 
+       spin_lock(&fs_info->fs_roots_radix_lock);
+       root = radix_tree_lookup(&fs_info->fs_roots_radix,
+                                (unsigned long)root_id);
+       spin_unlock(&fs_info->fs_roots_radix_lock);
+       return root;
+ }
+ 
+ int btrfs_insert_fs_root(struct btrfs_fs_info *fs_info,
+                        struct btrfs_root *root)
+ {
+       int ret;
+ 
+       ret = radix_tree_preload(GFP_NOFS & ~__GFP_HIGHMEM);
+       if (ret)
+               return ret;
+ 
+       spin_lock(&fs_info->fs_roots_radix_lock);
+       ret = radix_tree_insert(&fs_info->fs_roots_radix,
+                               (unsigned long)root->root_key.objectid,
+                               root);
+       if (ret == 0)
+               root->in_radix = 1;
+       spin_unlock(&fs_info->fs_roots_radix_lock);
+       radix_tree_preload_end();
+ 
+       return ret;
+ }
+ 
   struct btrfs_root *btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info,
                                               struct btrfs_key *location)
   {
@@@ -1543,58 -1581,30 +1582,30 @@@
                 return fs_info->quota_root ? fs_info->quota_root :
                                              ERR_PTR(-ENOENT);
   again:
-       spin_lock(&fs_info->fs_roots_radix_lock);
-       root = radix_tree_lookup(&fs_info->fs_roots_radix,
-                                (unsigned long)location->objectid);
-       spin_unlock(&fs_info->fs_roots_radix_lock);
+       root = btrfs_lookup_fs_root(fs_info, location->objectid);
         if (root)
                 return root;
   
-       root = btrfs_read_fs_root_no_radix(fs_info->tree_root, location);
+       root = btrfs_read_fs_root(fs_info->tree_root, location);
         if (IS_ERR(root))
                 return root;
   
-       root->free_ino_ctl = kzalloc(sizeof(*root->free_ino_ctl), GFP_NOFS);
-       root->free_ino_pinned = kzalloc(sizeof(*root->free_ino_pinned),
-                                       GFP_NOFS);
-       if (!root->free_ino_pinned || !root->free_ino_ctl) {
-               ret = -ENOMEM;
+       if (btrfs_root_refs(&root->root_item) == 0) {
+               ret = -ENOENT;
                 goto fail;
         }
   
-       btrfs_init_free_ino_ctl(root);
-       mutex_init(&root->fs_commit_mutex);
-       spin_lock_init(&root->cache_lock);
-       init_waitqueue_head(&root->cache_wait);
- 
-       ret = get_anon_bdev(&root->anon_dev);
+       ret = btrfs_init_fs_root(root);
         if (ret)
                 goto fail;
   
-       if (btrfs_root_refs(&root->root_item) == 0) {
-               ret = -ENOENT;
-               goto fail;
-       }
- 
         ret = btrfs_find_orphan_item(fs_info->tree_root, location->objectid);
         if (ret < 0)
                 goto fail;
         if (ret == 0)
                 root->orphan_item_inserted = 1;
   
-       ret = radix_tree_preload(GFP_NOFS & ~__GFP_HIGHMEM);
-       if (ret)
-               goto fail;
- 
-       spin_lock(&fs_info->fs_roots_radix_lock);
-       ret = radix_tree_insert(&fs_info->fs_roots_radix,
-                               (unsigned long)root->root_key.objectid,
-                               root);
-       if (ret == 0)
-               root->in_radix = 1;
- 
-       spin_unlock(&fs_info->fs_roots_radix_lock);
-       radix_tree_preload_end();
+       ret = btrfs_insert_fs_root(fs_info, root);
         if (ret) {
                 if (ret == -EEXIST) {
                         free_fs_root(root);
@@@ -1602,10 -1612,6 +1613,6 @@@
                 }
                 goto fail;
         }
- 
-       ret = btrfs_find_dead_roots(fs_info->tree_root,
-                                   root->root_key.objectid);
-       WARN_ON(ret);
         return root;
   fail:
         free_fs_root(root);
@@@ -1677,21 -1683,37 +1684,37 @@@ static void end_workqueue_fn(struct btr
   static int cleaner_kthread(void *arg)
   {
         struct btrfs_root *root = arg;
+       int again;
   
         do {
-               int again = 0;
- 
-               if (!(root->fs_info->sb->s_flags & MS_RDONLY) &&
-                   down_read_trylock(&root->fs_info->sb->s_umount)) {
-                       if (mutex_trylock(&root->fs_info->cleaner_mutex)) {
-                               btrfs_run_delayed_iputs(root);
-                               again = btrfs_clean_one_deleted_snapshot(root);
-                               mutex_unlock(&root->fs_info->cleaner_mutex);
-                       }
-                       btrfs_run_defrag_inodes(root->fs_info);
-                       up_read(&root->fs_info->sb->s_umount);
+               again = 0;
+ 
+               /* Make the cleaner go to sleep early. */
+               if (btrfs_need_cleaner_sleep(root))
+                       goto sleep;
+ 
+               if (!mutex_trylock(&root->fs_info->cleaner_mutex))
+                       goto sleep;
+ 
+               /*
+                * Avoid the problem that we change the status of the fs
+                * during the above check and trylock.
+                */
+               if (btrfs_need_cleaner_sleep(root)) {
+                       mutex_unlock(&root->fs_info->cleaner_mutex);
+                       goto sleep;
                 }
   
+               btrfs_run_delayed_iputs(root);
+               again = btrfs_clean_one_deleted_snapshot(root);
+               mutex_unlock(&root->fs_info->cleaner_mutex);
+ 
+               /*
+                * The defragger has dealt with the R/O remount and umount,
+                * needn't do anything special here.
+                */
+               btrfs_run_defrag_inodes(root->fs_info);
+ sleep:
                 if (!try_to_freeze() && !again) {
                         set_current_state(TASK_INTERRUPTIBLE);
                         if (!kthread_should_stop())
@@@ -1725,7 -1747,7 +1748,7 @@@ static int transaction_kthread(void *ar
                 }
   
                 now = get_seconds();
-               if (!cur->blocked &&
+               if (cur->state < TRANS_STATE_BLOCKED &&
                     (now < cur->start_time || now - cur->start_time < 30)) {
                         spin_unlock(&root->fs_info->trans_lock);
                         delay = HZ * 5;
@@@ -2035,11 -2057,11 +2058,11 @@@ static void del_fs_roots(struct btrfs_f
                 list_del(&gang[0]->root_list);
   
                 if (gang[0]->in_radix) {
-                       btrfs_free_fs_root(fs_info, gang[0]);
+                       btrfs_drop_and_free_fs_root(fs_info, gang[0]);
                 } else {
                         free_extent_buffer(gang[0]->node);
                         free_extent_buffer(gang[0]->commit_root);
-                       kfree(gang[0]);
+                       btrfs_put_fs_root(gang[0]);
                 }
         }
   
@@@ -2050,7 -2072,7 +2073,7 @@@
                 if (!ret)
                         break;
                 for (i = 0; i < ret; i++)
-                       btrfs_free_fs_root(fs_info, gang[i]);
+                       btrfs_drop_and_free_fs_root(fs_info, gang[i]);
         }
   }
   
@@@ -2082,14 -2104,8 +2105,8 @@@ int open_ctree(struct super_block *sb
         int backup_index = 0;
   
         tree_root = fs_info->tree_root = btrfs_alloc_root(fs_info);
-       extent_root = fs_info->extent_root = btrfs_alloc_root(fs_info);
-       csum_root = fs_info->csum_root = btrfs_alloc_root(fs_info);
         chunk_root = fs_info->chunk_root = btrfs_alloc_root(fs_info);
-       dev_root = fs_info->dev_root = btrfs_alloc_root(fs_info);
-       quota_root = fs_info->quota_root = btrfs_alloc_root(fs_info);
- 
-       if (!tree_root || !extent_root || !csum_root ||
-           !chunk_root || !dev_root || !quota_root) {
+       if (!tree_root || !chunk_root) {
                 err = -ENOMEM;
                 goto fail;
         }
@@@ -2132,9 -2148,9 +2149,9 @@@
         INIT_LIST_HEAD(&fs_info->trans_list);
         INIT_LIST_HEAD(&fs_info->dead_roots);
         INIT_LIST_HEAD(&fs_info->delayed_iputs);
-       INIT_LIST_HEAD(&fs_info->delalloc_inodes);
+       INIT_LIST_HEAD(&fs_info->delalloc_roots);
         INIT_LIST_HEAD(&fs_info->caching_block_groups);
-       spin_lock_init(&fs_info->delalloc_lock);
+       spin_lock_init(&fs_info->delalloc_root_lock);
         spin_lock_init(&fs_info->trans_lock);
         spin_lock_init(&fs_info->fs_roots_radix_lock);
         spin_lock_init(&fs_info->delayed_iput_lock);
@@@ -2170,7 -2186,6 +2187,6 @@@
         fs_info->max_inline = 8192 * 1024;
         fs_info->metadata_ratio = 0;
         fs_info->defrag_inodes = RB_ROOT;
-       fs_info->trans_no_join = 0;
         fs_info->free_chunk_space = 0;
         fs_info->tree_mod_log = RB_ROOT;
   
@@@ -2181,8 -2196,8 +2197,8 @@@
         fs_info->thread_pool_size = min_t(unsigned long,
                                           num_online_cpus() + 2, 8);
   
-       INIT_LIST_HEAD(&fs_info->ordered_extents);
-       spin_lock_init(&fs_info->ordered_extent_lock);
+       INIT_LIST_HEAD(&fs_info->ordered_roots);
+       spin_lock_init(&fs_info->ordered_root_lock);
         fs_info->delayed_root = kmalloc(sizeof(struct btrfs_delayed_root),
                                         GFP_NOFS);
         if (!fs_info->delayed_root) {
@@@ -2275,6 -2290,7 +2291,7 @@@
         fs_info->qgroup_seq = 1;
         fs_info->quota_enabled = 0;
         fs_info->pending_quota_state = 0;
+       fs_info->qgroup_ulist = NULL;
         mutex_init(&fs_info->qgroup_rescan_lock);
   
         btrfs_init_free_cluster(&fs_info->meta_alloc_cluster);
@@@ -2639,33 -2655,44 +2656,44 @@@ retry_root_backup
         btrfs_set_root_node(&tree_root->root_item, tree_root->node);
         tree_root->commit_root = btrfs_root_node(tree_root);
   
-       ret = find_and_setup_root(tree_root, fs_info,
-                                 BTRFS_EXTENT_TREE_OBJECTID, extent_root);
-       if (ret)
+       location.objectid = BTRFS_EXTENT_TREE_OBJECTID;
+       location.type = BTRFS_ROOT_ITEM_KEY;
+       location.offset = 0;
+ 
+       extent_root = btrfs_read_tree_root(tree_root, &location);
+       if (IS_ERR(extent_root)) {
+               ret = PTR_ERR(extent_root);
                 goto recovery_tree_root;
+       }
         extent_root->track_dirty = 1;
+       fs_info->extent_root = extent_root;
   
-       ret = find_and_setup_root(tree_root, fs_info,
-                                 BTRFS_DEV_TREE_OBJECTID, dev_root);
-       if (ret)
+       location.objectid = BTRFS_DEV_TREE_OBJECTID;
+       dev_root = btrfs_read_tree_root(tree_root, &location);
+       if (IS_ERR(dev_root)) {
+               ret = PTR_ERR(dev_root);
                 goto recovery_tree_root;
+       }
         dev_root->track_dirty = 1;
+       fs_info->dev_root = dev_root;
+       btrfs_init_devices_late(fs_info);
   
-       ret = find_and_setup_root(tree_root, fs_info,
-                                 BTRFS_CSUM_TREE_OBJECTID, csum_root);
-       if (ret)
+       location.objectid = BTRFS_CSUM_TREE_OBJECTID;
+       csum_root = btrfs_read_tree_root(tree_root, &location);
+       if (IS_ERR(csum_root)) {
+               ret = PTR_ERR(csum_root);
                 goto recovery_tree_root;
+       }
         csum_root->track_dirty = 1;
+       fs_info->csum_root = csum_root;
   
-       ret = find_and_setup_root(tree_root, fs_info,
-                                 BTRFS_QUOTA_TREE_OBJECTID, quota_root);
-       if (ret) {
-               kfree(quota_root);
-               quota_root = fs_info->quota_root = NULL;
-       } else {
+       location.objectid = BTRFS_QUOTA_TREE_OBJECTID;
+       quota_root = btrfs_read_tree_root(tree_root, &location);
+       if (!IS_ERR(quota_root)) {
                 quota_root->track_dirty = 1;
                 fs_info->quota_enabled = 1;
                 fs_info->pending_quota_state = 1;
+               fs_info->quota_root = quota_root;
         }
   
         fs_info->generation = generation;
@@@ -2818,11 -2845,9 +2846,9 @@@
   
         location.objectid = BTRFS_FS_TREE_OBJECTID;
         location.type = BTRFS_ROOT_ITEM_KEY;
-       location.offset = (u64)-1;
+       location.offset = 0;
   
         fs_info->fs_root = btrfs_read_fs_root_no_name(fs_info, &location);
-       if (!fs_info->fs_root)
-               goto fail_qgroup;
         if (IS_ERR(fs_info->fs_root)) {
                 err = PTR_ERR(fs_info->fs_root);
                 goto fail_qgroup;
@@@ -2854,6 -2879,8 +2880,8 @@@
                 return ret;
         }
   
+       btrfs_qgroup_rescan_resume(fs_info);
+ 
         return 0;
   
   fail_qgroup:
@@@ -3259,7 -3286,7 +3287,7 @@@ int btrfs_calc_num_tolerated_disk_barri
                                             BTRFS_BLOCK_GROUP_RAID10)) {
                                                 num_tolerated_disk_barrier_failures = 1;
                                         } else if (flags &
-                                                  BTRFS_BLOCK_GROUP_RAID5) {
+                                                  BTRFS_BLOCK_GROUP_RAID6) {
                                                 num_tolerated_disk_barrier_failures = 2;
                                         }
                                 }
@@@ -3367,7 -3394,9 +3395,9 @@@ int write_ctree_super(struct btrfs_tran
         return ret;
   }
   
- void btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root)
+ /* Drop a fs root from the radix tree and free it. */
+ void btrfs_drop_and_free_fs_root(struct btrfs_fs_info *fs_info,
+                                 struct btrfs_root *root)
   {
         spin_lock(&fs_info->fs_roots_radix_lock);
         radix_tree_delete(&fs_info->fs_roots_radix,
@@@ -3398,7 -3427,12 +3428,12 @@@ static void free_fs_root(struct btrfs_r
         kfree(root->free_ino_ctl);
         kfree(root->free_ino_pinned);
         kfree(root->name);
-       kfree(root);
+       btrfs_put_fs_root(root);
+ }
+ 
+ void btrfs_free_fs_root(struct btrfs_root *root)
+ {
+       free_fs_root(root);
   }
   
   int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info)
@@@ -3654,7 -3688,7 +3689,7 @@@ static void btrfs_destroy_ordered_opera
         INIT_LIST_HEAD(&splice);
   
         mutex_lock(&root->fs_info->ordered_operations_mutex);
-       spin_lock(&root->fs_info->ordered_extent_lock);
+       spin_lock(&root->fs_info->ordered_root_lock);
   
         list_splice_init(&t->ordered_operations, &splice);
         while (!list_empty(&splice)) {
@@@ -3662,14 -3696,14 +3697,14 @@@
                                          ordered_operations);
   
                 list_del_init(&btrfs_inode->ordered_operations);
-               spin_unlock(&root->fs_info->ordered_extent_lock);
+               spin_unlock(&root->fs_info->ordered_root_lock);
   
                 btrfs_invalidate_inodes(btrfs_inode->root);
   
-               spin_lock(&root->fs_info->ordered_extent_lock);
+               spin_lock(&root->fs_info->ordered_root_lock);
         }
   
-       spin_unlock(&root->fs_info->ordered_extent_lock);
+       spin_unlock(&root->fs_info->ordered_root_lock);
         mutex_unlock(&root->fs_info->ordered_operations_mutex);
   }
   
@@@ -3677,15 -3711,36 +3712,36 @@@ static void btrfs_destroy_ordered_exten
   {
         struct btrfs_ordered_extent *ordered;
   
-       spin_lock(&root->fs_info->ordered_extent_lock);
+       spin_lock(&root->ordered_extent_lock);
         /*
          * This will just short circuit the ordered completion stuff which will
          * make sure the ordered extent gets properly cleaned up.
          */
-       list_for_each_entry(ordered, &root->fs_info->ordered_extents,
+       list_for_each_entry(ordered, &root->ordered_extents,
                             root_extent_list)
                 set_bit(BTRFS_ORDERED_IOERR, &ordered->flags);
-       spin_unlock(&root->fs_info->ordered_extent_lock);
+       spin_unlock(&root->ordered_extent_lock);
+ }
+ 
+ static void btrfs_destroy_all_ordered_extents(struct btrfs_fs_info *fs_info)
+ {
+       struct btrfs_root *root;
+       struct list_head splice;
+ 
+       INIT_LIST_HEAD(&splice);
+ 
+       spin_lock(&fs_info->ordered_root_lock);
+       list_splice_init(&fs_info->ordered_roots, &splice);
+       while (!list_empty(&splice)) {
+               root = list_first_entry(&splice, struct btrfs_root,
+                                       ordered_root);
+               list_del_init(&root->ordered_root);
+ 
+               btrfs_destroy_ordered_extents(root);
+ 
+               cond_resched_lock(&fs_info->ordered_root_lock);
+       }
+       spin_unlock(&fs_info->ordered_root_lock);
   }
   
   int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
@@@ -3707,6 -3762,7 +3763,7 @@@
   
         while ((node = rb_first(&delayed_refs->root)) != NULL) {
                 struct btrfs_delayed_ref_head *head = NULL;
+               bool pin_bytes = false;
   
                 ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
                 atomic_set(&ref->refs, 1);
@@@ -3727,8 -3783,7 +3784,7 @@@
                         }
   
                         if (head->must_insert_reserved)
-                               btrfs_pin_extent(root, ref->bytenr,
-                                                ref->num_bytes, 1);
+                               pin_bytes = true;
                         btrfs_free_delayed_extent_op(head->extent_op);
                         delayed_refs->num_heads--;
                         if (list_empty(&head->cluster))
@@@ -3739,9 -3794,13 +3795,13 @@@
                 ref->in_tree = 0;
                 rb_erase(&ref->rb_node, &delayed_refs->root);
                 delayed_refs->num_entries--;
-               if (head)
-                       mutex_unlock(&head->mutex);
                 spin_unlock(&delayed_refs->lock);
+               if (head) {
+                       if (pin_bytes)
+                               btrfs_pin_extent(root, ref->bytenr,
+                                                ref->num_bytes, 1);
+                       mutex_unlock(&head->mutex);
+               }
                 btrfs_put_delayed_ref(ref);
   
                 cond_resched();
@@@ -3778,24 -3837,49 +3838,49 @@@ static void btrfs_destroy_delalloc_inod
   
         INIT_LIST_HEAD(&splice);
   
-       spin_lock(&root->fs_info->delalloc_lock);
-       list_splice_init(&root->fs_info->delalloc_inodes, &splice);
+       spin_lock(&root->delalloc_lock);
+       list_splice_init(&root->delalloc_inodes, &splice);
   
         while (!list_empty(&splice)) {
-               btrfs_inode = list_entry(splice.next, struct btrfs_inode,
-                                   delalloc_inodes);
+               btrfs_inode = list_first_entry(&splice, struct btrfs_inode,
+                                              delalloc_inodes);
   
                 list_del_init(&btrfs_inode->delalloc_inodes);
                 clear_bit(BTRFS_INODE_IN_DELALLOC_LIST,
                           &btrfs_inode->runtime_flags);
-               spin_unlock(&root->fs_info->delalloc_lock);
+               spin_unlock(&root->delalloc_lock);
   
                 btrfs_invalidate_inodes(btrfs_inode->root);
   
-               spin_lock(&root->fs_info->delalloc_lock);
+               spin_lock(&root->delalloc_lock);
         }
   
-       spin_unlock(&root->fs_info->delalloc_lock);
+       spin_unlock(&root->delalloc_lock);
+ }
+ 
+ static void btrfs_destroy_all_delalloc_inodes(struct btrfs_fs_info *fs_info)
+ {
+       struct btrfs_root *root;
+       struct list_head splice;
+ 
+       INIT_LIST_HEAD(&splice);
+ 
+       spin_lock(&fs_info->delalloc_root_lock);
+       list_splice_init(&fs_info->delalloc_roots, &splice);
+       while (!list_empty(&splice)) {
+               root = list_first_entry(&splice, struct btrfs_root,
+                                        delalloc_root);
+               list_del_init(&root->delalloc_root);
+               root = btrfs_grab_fs_root(root);
+               BUG_ON(!root);
+               spin_unlock(&fs_info->delalloc_root_lock);
+ 
+               btrfs_destroy_delalloc_inodes(root);
+               btrfs_put_fs_root(root);
+ 
+               spin_lock(&fs_info->delalloc_root_lock);
+       }
+       spin_unlock(&fs_info->delalloc_root_lock);
   }
   
   static int btrfs_destroy_marked_extents(struct btrfs_root *root,
@@@ -3879,19 -3963,14 +3964,14 @@@ void btrfs_cleanup_one_transaction(stru
         btrfs_block_rsv_release(root, &root->fs_info->trans_block_rsv,
                                 cur_trans->dirty_pages.dirty_bytes);
   
-       /* FIXME: cleanup wait for commit */
-       cur_trans->in_commit = 1;
-       cur_trans->blocked = 1;
+       cur_trans->state = TRANS_STATE_COMMIT_START;
         wake_up(&root->fs_info->transaction_blocked_wait);
   
         btrfs_evict_pending_snapshots(cur_trans);
   
-       cur_trans->blocked = 0;
+       cur_trans->state = TRANS_STATE_UNBLOCKED;
         wake_up(&root->fs_info->transaction_wait);
   
-       cur_trans->commit_done = 1;
-       wake_up(&cur_trans->commit_wait);
- 
         btrfs_destroy_delayed_inodes(root);
         btrfs_assert_delayed_root_empty(root);
   
@@@ -3900,6 -3979,9 +3980,9 @@@
         btrfs_destroy_pinned_extent(root,
                                     root->fs_info->pinned_extents);
   
+       cur_trans->state =TRANS_STATE_COMPLETED;
+       wake_up(&cur_trans->commit_wait);
+ 
         /*
         memset(cur_trans, 0, sizeof(*cur_trans));
         kmem_cache_free(btrfs_transaction_cachep, cur_trans);
@@@ -3915,7 -3997,7 +3998,7 @@@ static int btrfs_cleanup_transaction(st
   
         spin_lock(&root->fs_info->trans_lock);
         list_splice_init(&root->fs_info->trans_list, &list);
-       root->fs_info->trans_no_join = 1;
+       root->fs_info->running_transaction = NULL;
         spin_unlock(&root->fs_info->trans_lock);
   
         while (!list_empty(&list)) {
@@@ -3923,37 -4005,31 +4006,31 @@@
   
                 btrfs_destroy_ordered_operations(t, root);
   
-               btrfs_destroy_ordered_extents(root);
+               btrfs_destroy_all_ordered_extents(root->fs_info);
   
                 btrfs_destroy_delayed_refs(t, root);
   
-               /* FIXME: cleanup wait for commit */
-               t->in_commit = 1;
-               t->blocked = 1;
+               /*
+                *  FIXME: cleanup wait for commit
+                *  We needn't acquire the lock here, because we are during
+                *  the umount, there is no other task which will change it.
+                */
+               t->state = TRANS_STATE_COMMIT_START;
                 smp_mb();
                 if (waitqueue_active(&root->fs_info->transaction_blocked_wait))
                         wake_up(&root->fs_info->transaction_blocked_wait);
   
                 btrfs_evict_pending_snapshots(t);
   
-               t->blocked = 0;
+               t->state = TRANS_STATE_UNBLOCKED;
                 smp_mb();
                 if (waitqueue_active(&root->fs_info->transaction_wait))
                         wake_up(&root->fs_info->transaction_wait);
   
-               t->commit_done = 1;
-               smp_mb();
-               if (waitqueue_active(&t->commit_wait))
-                       wake_up(&t->commit_wait);
- 
                 btrfs_destroy_delayed_inodes(root);
                 btrfs_assert_delayed_root_empty(root);
   
-               btrfs_destroy_delalloc_inodes(root);
- 
-               spin_lock(&root->fs_info->trans_lock);
-               root->fs_info->running_transaction = NULL;
-               spin_unlock(&root->fs_info->trans_lock);
+               btrfs_destroy_all_delalloc_inodes(root->fs_info);
   
                 btrfs_destroy_marked_extents(root, &t->dirty_pages,
                                              EXTENT_DIRTY);
@@@ -3961,15 -4037,17 +4038,17 @@@
                 btrfs_destroy_pinned_extent(root,
                                             root->fs_info->pinned_extents);
   
+               t->state = TRANS_STATE_COMPLETED;
+               smp_mb();
+               if (waitqueue_active(&t->commit_wait))
+                       wake_up(&t->commit_wait);
+ 
                 atomic_set(&t->use_count, 0);
                 list_del_init(&t->list);
                 memset(t, 0, sizeof(*t));
                 kmem_cache_free(btrfs_transaction_cachep, t);
         }
   
-       spin_lock(&root->fs_info->trans_lock);
-       root->fs_info->trans_no_join = 0;
-       spin_unlock(&root->fs_info->trans_lock);
         mutex_unlock(&root->fs_info->transaction_kthread_mutex);
   
         return 0;
diff --combined fs/btrfs/extent_io.c

index 6bca9472f313cda2cb7ad1f230dda69bf4b1e8a9,f8586a957a020cc62591ddee1489d6f379308c89..583d98bd065ed83ca979a2786b59ae4342380c47
--- 1/fs/btrfs/extent_io.c
--- 2/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@@ -77,10 -77,29 +77,29 @@@ void btrfs_leak_debug_check(void
                 kmem_cache_free(extent_buffer_cache, eb);
         }
   }
+ 
+ #define btrfs_debug_check_extent_io_range(inode, start, end)          \
+       __btrfs_debug_check_extent_io_range(__func__, (inode), (start), (end))
+ static inline void __btrfs_debug_check_extent_io_range(const char *caller,
+               struct inode *inode, u64 start, u64 end)
+ {
+       u64 isize = i_size_read(inode);
+ 
+       if (end >= PAGE_SIZE && (end % 2) == 0 && end != isize - 1) {
+               printk_ratelimited(KERN_DEBUG
+                   "btrfs: %s: ino %llu isize %llu odd range [%llu,%llu]\n",
+                               caller,
+                               (unsigned long long)btrfs_ino(inode),
+                               (unsigned long long)isize,
+                               (unsigned long long)start,
+                               (unsigned long long)end);
+       }
+ }
   #else
   #define btrfs_leak_debug_add(new, head)       do {} while (0)
   #define btrfs_leak_debug_del(entry)   do {} while (0)
   #define btrfs_leak_debug_check()      do {} while (0)
+ #define btrfs_debug_check_extent_io_range(c, s, e)    do {} while (0)
   #endif
   
   #define BUFFER_LRU_MAX 64
@@@ -522,6 -541,11 +541,11 @@@ int clear_extent_bit(struct extent_io_t
         int err;
         int clear = 0;
   
+       btrfs_debug_check_extent_io_range(tree->mapping->host, start, end);
+ 
+       if (bits & EXTENT_DELALLOC)
+               bits |= EXTENT_NORESERVE;
+ 
         if (delete)
                 bits |= ~EXTENT_CTLBITS;
         bits |= EXTENT_FIRST_DELALLOC;
@@@ -677,6 -701,8 +701,8 @@@ static void wait_extent_bit(struct exte
         struct extent_state *state;
         struct rb_node *node;
   
+       btrfs_debug_check_extent_io_range(tree->mapping->host, start, end);
+ 
         spin_lock(&tree->lock);
   again:
         while (1) {
@@@ -769,6 -795,8 +795,8 @@@ __set_extent_bit(struct extent_io_tree 
         u64 last_start;
         u64 last_end;
   
+       btrfs_debug_check_extent_io_range(tree->mapping->host, start, end);
+ 
         bits |= EXTENT_FIRST_DELALLOC;
   again:
         if (!prealloc && (mask & __GFP_WAIT)) {
@@@ -989,6 -1017,8 +1017,8 @@@ int convert_extent_bit(struct extent_io
         u64 last_start;
         u64 last_end;
   
+       btrfs_debug_check_extent_io_range(tree->mapping->host, start, end);
+ 
   again:
         if (!prealloc && (mask & __GFP_WAIT)) {
                 prealloc = alloc_extent_state(mask);
@@@ -2450,11 -2480,12 +2480,12 @@@ static void end_bio_extent_readpage(str
                 struct extent_state *cached = NULL;
                 struct extent_state *state;
                 struct btrfs_io_bio *io_bio = btrfs_io_bio(bio);
+               struct inode *inode = page->mapping->host;
   
                 pr_debug("end_bio_extent_readpage: bi_sector=%llu, err=%d, "
                          "mirror=%lu\n", (u64)bio->bi_sector, err,
                          io_bio->mirror_num);
-               tree = &BTRFS_I(page->mapping->host)->io_tree;
+               tree = &BTRFS_I(inode)->io_tree;
   
                 /* We always issue full-page reads, but if some block
                  * in a page fails to read, blk_update_request() will
@@@ -2528,6 -2559,14 +2559,14 @@@
                 unlock_extent_cached(tree, start, end, &cached, GFP_ATOMIC);
   
                 if (uptodate) {
+                       loff_t i_size = i_size_read(inode);
+                       pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT;
+                       unsigned offset;
+ 
+                       /* Zero out the end if this page straddles i_size */
+                       offset = i_size & (PAGE_CACHE_SIZE-1);
+                       if (page->index == end_index && offset)
+                               zero_user_segment(page, offset, PAGE_CACHE_SIZE);
                         SetPageUptodate(page);
                 } else {
                         ClearPageUptodate(page);
@@@ -2643,7 -2682,8 +2682,7 @@@ static int submit_extent_page(int rw, s
                 if (old_compressed)
                         contig = bio->bi_sector == sector;
                 else
- -                      contig = bio->bi_sector + (bio->bi_size >> 9) ==
- -                              sector;
+ +                      contig = bio_end_sector(bio) == sector;
   
                 if (prev_bio_flags != bio_flags || !contig ||
                     merge_bio(rw, tree, page, offset, page_size, bio, bio_flags) ||
@@@ -2957,7 -2997,7 +2996,7 @@@ static int __extent_writepage(struct pa
         pg_offset = i_size & (PAGE_CACHE_SIZE - 1);
         if (page->index > end_index ||
            (page->index == end_index && !pg_offset)) {
- -              page->mapping->a_ops->invalidatepage(page, 0);
+ +              page->mapping->a_ops->invalidatepage(page, 0, PAGE_CACHE_SIZE);
                 unlock_page(page);
                 return 0;
         }
diff --combined fs/btrfs/file.c

index 89da56a58b635c9bf80197c0cf32e2dc3f698442,2d70849cec92b714476657d5a75441a47c4812d5..a005fe2c072ad0751254adba0fa4e04db10cc996
--- 1/fs/btrfs/file.c
--- 2/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@@ -24,7 -24,6 +24,7 @@@
   #include <linux/string.h>
   #include <linux/backing-dev.h>
   #include <linux/mpage.h>
+ +#include <linux/aio.h>
   #include <linux/falloc.h>
   #include <linux/swap.h>
   #include <linux/writeback.h>
@@@ -309,10 -308,6 +309,6 @@@ static int __btrfs_run_defrag_inode(str
                 ret = PTR_ERR(inode_root);
                 goto cleanup;
         }
-       if (btrfs_root_refs(&inode_root->root_item) == 0) {
-               ret = -ENOENT;
-               goto cleanup;
-       }
   
         key.objectid = defrag->ino;
         btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY);
@@@ -1317,6 -1312,56 +1313,56 @@@ fail
   
   }
   
+ static noinline int check_can_nocow(struct inode *inode, loff_t pos,
+                                   size_t *write_bytes)
+ {
+       struct btrfs_trans_handle *trans;
+       struct btrfs_root *root = BTRFS_I(inode)->root;
+       struct btrfs_ordered_extent *ordered;
+       u64 lockstart, lockend;
+       u64 num_bytes;
+       int ret;
+ 
+       lockstart = round_down(pos, root->sectorsize);
+       lockend = lockstart + round_up(*write_bytes, root->sectorsize) - 1;
+ 
+       while (1) {
+               lock_extent(&BTRFS_I(inode)->io_tree, lockstart, lockend);
+               ordered = btrfs_lookup_ordered_range(inode, lockstart,
+                                                    lockend - lockstart + 1);
+               if (!ordered) {
+                       break;
+               }
+               unlock_extent(&BTRFS_I(inode)->io_tree, lockstart, lockend);
+               btrfs_start_ordered_extent(inode, ordered, 1);
+               btrfs_put_ordered_extent(ordered);
+       }
+ 
+       trans = btrfs_join_transaction(root);
+       if (IS_ERR(trans)) {
+               unlock_extent(&BTRFS_I(inode)->io_tree, lockstart, lockend);
+               return PTR_ERR(trans);
+       }
+ 
+       num_bytes = lockend - lockstart + 1;
+       ret = can_nocow_extent(trans, inode, lockstart, &num_bytes, NULL, NULL,
+                              NULL);
+       btrfs_end_transaction(trans, root);
+       if (ret <= 0) {
+               ret = 0;
+       } else {
+               clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend,
+                                EXTENT_DIRTY | EXTENT_DELALLOC |
+                                EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG, 0, 0,
+                                NULL, GFP_NOFS);
+               *write_bytes = min_t(size_t, *write_bytes, num_bytes);
+       }
+ 
+       unlock_extent(&BTRFS_I(inode)->io_tree, lockstart, lockend);
+ 
+       return ret;
+ }
+ 
   static noinline ssize_t __btrfs_buffered_write(struct file *file,
                                                struct iov_iter *i,
                                                loff_t pos)
@@@ -1324,10 -1369,12 +1370,12 @@@
         struct inode *inode = file_inode(file);
         struct btrfs_root *root = BTRFS_I(inode)->root;
         struct page **pages = NULL;
+       u64 release_bytes = 0;
         unsigned long first_index;
         size_t num_written = 0;
         int nrptrs;
         int ret = 0;
+       bool only_release_metadata = false;
         bool force_page_uptodate = false;
   
         nrptrs = min((iov_iter_count(i) + PAGE_CACHE_SIZE - 1) /
@@@ -1348,6 -1395,7 +1396,7 @@@
                                          offset);
                 size_t num_pages = (write_bytes + offset +
                                     PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+               size_t reserve_bytes;
                 size_t dirty_pages;
                 size_t copied;
   
@@@ -1362,11 -1410,41 +1411,41 @@@
                         break;
                 }
   
-               ret = btrfs_delalloc_reserve_space(inode,
-                                       num_pages << PAGE_CACHE_SHIFT);
+               reserve_bytes = num_pages << PAGE_CACHE_SHIFT;
+               ret = btrfs_check_data_free_space(inode, reserve_bytes);
+               if (ret == -ENOSPC &&
+                   (BTRFS_I(inode)->flags & (BTRFS_INODE_NODATACOW |
+                                             BTRFS_INODE_PREALLOC))) {
+                       ret = check_can_nocow(inode, pos, &write_bytes);
+                       if (ret > 0) {
+                               only_release_metadata = true;
+                               /*
+                                * our prealloc extent may be smaller than
+                                * write_bytes, so scale down.
+                                */
+                               num_pages = (write_bytes + offset +
+                                            PAGE_CACHE_SIZE - 1) >>
+                                       PAGE_CACHE_SHIFT;
+                               reserve_bytes = num_pages << PAGE_CACHE_SHIFT;
+                               ret = 0;
+                       } else {
+                               ret = -ENOSPC;
+                       }
+               }
+ 
                 if (ret)
                         break;
   
+               ret = btrfs_delalloc_reserve_metadata(inode, reserve_bytes);
+               if (ret) {
+                       if (!only_release_metadata)
+                               btrfs_free_reserved_data_space(inode,
+                                                              reserve_bytes);
+                       break;
+               }
+ 
+               release_bytes = reserve_bytes;
+ 
                 /*
                  * This is going to setup the pages array with the number of
                  * pages we want, so we don't really need to worry about the
@@@ -1375,11 -1453,8 +1454,8 @@@
                 ret = prepare_pages(root, file, pages, num_pages,
                                     pos, first_index, write_bytes,
                                     force_page_uptodate);
-               if (ret) {
-                       btrfs_delalloc_release_space(inode,
-                                       num_pages << PAGE_CACHE_SHIFT);
+               if (ret)
                         break;
-               }
   
                 copied = btrfs_copy_from_user(pos, num_pages,
                                            write_bytes, pages, i);
@@@ -1409,30 -1484,46 +1485,46 @@@
                  * managed to copy.
                  */
                 if (num_pages > dirty_pages) {
+                       release_bytes = (num_pages - dirty_pages) <<
+                               PAGE_CACHE_SHIFT;
                         if (copied > 0) {
                                 spin_lock(&BTRFS_I(inode)->lock);
                                 BTRFS_I(inode)->outstanding_extents++;
                                 spin_unlock(&BTRFS_I(inode)->lock);
                         }
-                       btrfs_delalloc_release_space(inode,
-                                       (num_pages - dirty_pages) <<
-                                       PAGE_CACHE_SHIFT);
+                       if (only_release_metadata)
+                               btrfs_delalloc_release_metadata(inode,
+                                                               release_bytes);
+                       else
+                               btrfs_delalloc_release_space(inode,
+                                                            release_bytes);
                 }
   
+               release_bytes = dirty_pages << PAGE_CACHE_SHIFT;
                 if (copied > 0) {
                         ret = btrfs_dirty_pages(root, inode, pages,
                                                 dirty_pages, pos, copied,
                                                 NULL);
                         if (ret) {
-                               btrfs_delalloc_release_space(inode,
-                                       dirty_pages << PAGE_CACHE_SHIFT);
                                 btrfs_drop_pages(pages, num_pages);
                                 break;
                         }
                 }
   
+               release_bytes = 0;
                 btrfs_drop_pages(pages, num_pages);
   
+               if (only_release_metadata && copied > 0) {
+                       u64 lockstart = round_down(pos, root->sectorsize);
+                       u64 lockend = lockstart +
+                               (dirty_pages << PAGE_CACHE_SHIFT) - 1;
+ 
+                       set_extent_bit(&BTRFS_I(inode)->io_tree, lockstart,
+                                      lockend, EXTENT_NORESERVE, NULL,
+                                      NULL, GFP_NOFS);
+                       only_release_metadata = false;
+               }
+ 
                 cond_resched();
   
                 balance_dirty_pages_ratelimited(inode->i_mapping);
@@@ -1445,6 -1536,13 +1537,13 @@@
   
         kfree(pages);
   
+       if (release_bytes) {
+               if (only_release_metadata)
+                       btrfs_delalloc_release_metadata(inode, release_bytes);
+               else
+                       btrfs_delalloc_release_space(inode, release_bytes);
+       }
+ 
         return num_written ? num_written : ret;
   }
   
@@@ -1518,6 -1616,8 +1617,6 @@@ static ssize_t btrfs_file_aio_write(str
         size_t count, ocount;
         bool sync = (file->f_flags & O_DSYNC) || IS_SYNC(file->f_mapping->host);
   
- -      sb_start_write(inode->i_sb);
- -
         mutex_lock(&inode->i_mutex);
   
         err = generic_segment_checks(iov, &nr_segs, &ocount, VERIFY_READ);
@@@ -1619,6 -1719,7 +1718,6 @@@
         if (sync)
                 atomic_dec(&BTRFS_I(inode)->sync_writers);
   out:
- -      sb_end_write(inode->i_sb);
         current->backing_dev_info = NULL;
         return num_written ? num_written : err;
   }
@@@ -2175,12 -2276,6 +2274,6 @@@ static long btrfs_fallocate(struct fil
                         goto out_reserve_fail;
         }
   
-       /*
-        * wait for ordered IO before we have any locks.  We'll loop again
-        * below with the locks held.
-        */
-       btrfs_wait_ordered_range(inode, alloc_start, alloc_end - alloc_start);
- 
         mutex_lock(&inode->i_mutex);
         ret = inode_newsize_ok(inode, alloc_end);
         if (ret)
@@@ -2191,8 -2286,23 +2284,23 @@@
                                         alloc_start);
                 if (ret)
                         goto out;
+       } else {
+               /*
+                * If we are fallocating from the end of the file onward we
+                * need to zero out the end of the page if i_size lands in the
+                * middle of a page.
+                */
+               ret = btrfs_truncate_page(inode, inode->i_size, 0, 0);
+               if (ret)
+                       goto out;
         }
   
+       /*
+        * wait for ordered IO before we have any locks.  We'll loop again
+        * below with the locks held.
+        */
+       btrfs_wait_ordered_range(inode, alloc_start, alloc_end - alloc_start);
+ 
         locked_end = alloc_end - 1;
         while (1) {
                 struct btrfs_ordered_extent *ordered;
@@@ -2425,7 -2535,20 +2533,7 @@@ static loff_t btrfs_file_llseek(struct 
                 }
         }
   
- -      if (offset < 0 && !(file->f_mode & FMODE_UNSIGNED_OFFSET)) {
- -              offset = -EINVAL;
- -              goto out;
- -      }
- -      if (offset > inode->i_sb->s_maxbytes) {
- -              offset = -EINVAL;
- -              goto out;
- -      }
- -
- -      /* Special lock needed here? */
- -      if (offset != file->f_pos) {
- -              file->f_pos = offset;
- -              file->f_version = 0;
- -      }
+ +      offset = vfs_setpos(file, offset, inode->i_sb->s_maxbytes);
   out:
         mutex_unlock(&inode->i_mutex);
         return offset;
diff --combined fs/btrfs/free-space-cache.c

index 2750b50235269d2304ce45a906b2c5b43dae8b5c,75172853d7139a0d136d95abc420df4ab2e816ea..b21a3cd667d8cc656878b8d462aa7cd45ebc8435
--- 1/fs/btrfs/free-space-cache.c
--- 2/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@@ -213,7 -213,7 +213,7 @@@ int btrfs_check_trunc_cache_free_space(
         else
                 ret = 0;
         spin_unlock(&rsv->lock);
-       return 0;
+       return ret;
   }
   
   int btrfs_truncate_free_space_cache(struct btrfs_root *root,
@@@ -3150,6 -3150,8 +3150,8 @@@ again
         return 0;
   }
   
+ #define test_msg(fmt, ...) printk(KERN_INFO "btrfs: selftest: " fmt, ##__VA_ARGS__)
+ 
   /*
    * This test just does basic sanity checking, making sure we can add an exten
    * entry and remove space from either end and the middle, and make sure we can
@@@ -3159,63 -3161,63 +3161,63 @@@ static int test_extents(struct btrfs_bl
   {
         int ret = 0;
   
-       printk(KERN_ERR "Running extent only tests\n");
+       test_msg("Running extent only tests\n");
   
         /* First just make sure we can remove an entire entry */
         ret = btrfs_add_free_space(cache, 0, 4 * 1024 * 1024);
         if (ret) {
-               printk(KERN_ERR "Error adding initial extents %d\n", ret);
+               test_msg("Error adding initial extents %d\n", ret);
                 return ret;
         }
   
         ret = btrfs_remove_free_space(cache, 0, 4 * 1024 * 1024);
         if (ret) {
-               printk(KERN_ERR "Error removing extent %d\n", ret);
+               test_msg("Error removing extent %d\n", ret);
                 return ret;
         }
   
         if (check_exists(cache, 0, 4 * 1024 * 1024)) {
-               printk(KERN_ERR "Full remove left some lingering space\n");
+               test_msg("Full remove left some lingering space\n");
                 return -1;
         }
   
         /* Ok edge and middle cases now */
         ret = btrfs_add_free_space(cache, 0, 4 * 1024 * 1024);
         if (ret) {
-               printk(KERN_ERR "Error adding half extent %d\n", ret);
+               test_msg("Error adding half extent %d\n", ret);
                 return ret;
         }
   
         ret = btrfs_remove_free_space(cache, 3 * 1024 * 1024, 1 * 1024 * 1024);
         if (ret) {
-               printk(KERN_ERR "Error removing tail end %d\n", ret);
+               test_msg("Error removing tail end %d\n", ret);
                 return ret;
         }
   
         ret = btrfs_remove_free_space(cache, 0, 1 * 1024 * 1024);
         if (ret) {
-               printk(KERN_ERR "Error removing front end %d\n", ret);
+               test_msg("Error removing front end %d\n", ret);
                 return ret;
         }
   
         ret = btrfs_remove_free_space(cache, 2 * 1024 * 1024, 4096);
         if (ret) {
-               printk(KERN_ERR "Error removing middle piece %d\n", ret);
- -              test_msg("Error removing middle peice %d\n", ret);
++              test_msg("Error removing middle piece %d\n", ret);
                 return ret;
         }
   
         if (check_exists(cache, 0, 1 * 1024 * 1024)) {
-               printk(KERN_ERR "Still have space at the front\n");
+               test_msg("Still have space at the front\n");
                 return -1;
         }
   
         if (check_exists(cache, 2 * 1024 * 1024, 4096)) {
-               printk(KERN_ERR "Still have space in the middle\n");
+               test_msg("Still have space in the middle\n");
                 return -1;
         }
   
         if (check_exists(cache, 3 * 1024 * 1024, 1 * 1024 * 1024)) {
-               printk(KERN_ERR "Still have space at the end\n");
+               test_msg("Still have space at the end\n");
                 return -1;
         }
   
@@@ -3230,34 -3232,34 +3232,34 @@@ static int test_bitmaps(struct btrfs_bl
         u64 next_bitmap_offset;
         int ret;
   
-       printk(KERN_ERR "Running bitmap only tests\n");
+       test_msg("Running bitmap only tests\n");
   
         ret = add_free_space_entry(cache, 0, 4 * 1024 * 1024, 1);
         if (ret) {
-               printk(KERN_ERR "Couldn't create a bitmap entry %d\n", ret);
+               test_msg("Couldn't create a bitmap entry %d\n", ret);
                 return ret;
         }
   
         ret = btrfs_remove_free_space(cache, 0, 4 * 1024 * 1024);
         if (ret) {
-               printk(KERN_ERR "Error removing bitmap full range %d\n", ret);
+               test_msg("Error removing bitmap full range %d\n", ret);
                 return ret;
         }
   
         if (check_exists(cache, 0, 4 * 1024 * 1024)) {
-               printk(KERN_ERR "Left some space in bitmap\n");
+               test_msg("Left some space in bitmap\n");
                 return -1;
         }
   
         ret = add_free_space_entry(cache, 0, 4 * 1024 * 1024, 1);
         if (ret) {
-               printk(KERN_ERR "Couldn't add to our bitmap entry %d\n", ret);
+               test_msg("Couldn't add to our bitmap entry %d\n", ret);
                 return ret;
         }
   
         ret = btrfs_remove_free_space(cache, 1 * 1024 * 1024, 2 * 1024 * 1024);
         if (ret) {
-               printk(KERN_ERR "Couldn't remove middle chunk %d\n", ret);
+               test_msg("Couldn't remove middle chunk %d\n", ret);
                 return ret;
         }
   
@@@ -3271,21 -3273,21 +3273,21 @@@
         ret = add_free_space_entry(cache, next_bitmap_offset -
                                    (2 * 1024 * 1024), 4 * 1024 * 1024, 1);
         if (ret) {
-               printk(KERN_ERR "Couldn't add space that straddles two bitmaps"
-                      " %d\n", ret);
+               test_msg("Couldn't add space that straddles two bitmaps %d\n",
+                               ret);
                 return ret;
         }
   
         ret = btrfs_remove_free_space(cache, next_bitmap_offset -
                                       (1 * 1024 * 1024), 2 * 1024 * 1024);
         if (ret) {
-               printk(KERN_ERR "Couldn't remove overlapping space %d\n", ret);
+               test_msg("Couldn't remove overlapping space %d\n", ret);
                 return ret;
         }
   
         if (check_exists(cache, next_bitmap_offset - (1 * 1024 * 1024),
                          2 * 1024 * 1024)) {
-               printk(KERN_ERR "Left some space when removing overlapping\n");
+               test_msg("Left some space when removing overlapping\n");
                 return -1;
         }
   
@@@ -3300,7 -3302,7 +3302,7 @@@ static int test_bitmaps_and_extents(str
         u64 bitmap_offset = (u64)(BITS_PER_BITMAP * 4096);
         int ret;
   
-       printk(KERN_ERR "Running bitmap and extent tests\n");
+       test_msg("Running bitmap and extent tests\n");
   
         /*
          * First let's do something simple, an extent at the same offset as the
@@@ -3309,42 -3311,42 +3311,42 @@@
          */
         ret = add_free_space_entry(cache, 4 * 1024 * 1024, 1 * 1024 * 1024, 1);
         if (ret) {
-               printk(KERN_ERR "Couldn't create bitmap entry %d\n", ret);
+               test_msg("Couldn't create bitmap entry %d\n", ret);
                 return ret;
         }
   
         ret = add_free_space_entry(cache, 0, 1 * 1024 * 1024, 0);
         if (ret) {
-               printk(KERN_ERR "Couldn't add extent entry %d\n", ret);
+               test_msg("Couldn't add extent entry %d\n", ret);
                 return ret;
         }
   
         ret = btrfs_remove_free_space(cache, 0, 1 * 1024 * 1024);
         if (ret) {
-               printk(KERN_ERR "Couldn't remove extent entry %d\n", ret);
+               test_msg("Couldn't remove extent entry %d\n", ret);
                 return ret;
         }
   
         if (check_exists(cache, 0, 1 * 1024 * 1024)) {
-               printk(KERN_ERR "Left remnants after our remove\n");
+               test_msg("Left remnants after our remove\n");
                 return -1;
         }
   
         /* Now to add back the extent entry and remove from the bitmap */
         ret = add_free_space_entry(cache, 0, 1 * 1024 * 1024, 0);
         if (ret) {
-               printk(KERN_ERR "Couldn't re-add extent entry %d\n", ret);
+               test_msg("Couldn't re-add extent entry %d\n", ret);
                 return ret;
         }
   
         ret = btrfs_remove_free_space(cache, 4 * 1024 * 1024, 1 * 1024 * 1024);
         if (ret) {
-               printk(KERN_ERR "Couldn't remove from bitmap %d\n", ret);
+               test_msg("Couldn't remove from bitmap %d\n", ret);
                 return ret;
         }
   
         if (check_exists(cache, 4 * 1024 * 1024, 1 * 1024 * 1024)) {
-               printk(KERN_ERR "Left remnants in the bitmap\n");
+               test_msg("Left remnants in the bitmap\n");
                 return -1;
         }
   
@@@ -3354,19 -3356,18 +3356,18 @@@
          */
         ret = add_free_space_entry(cache, 1 * 1024 * 1024, 4 * 1024 * 1024, 1);
         if (ret) {
-               printk(KERN_ERR "Couldn't add to a bitmap %d\n", ret);
+               test_msg("Couldn't add to a bitmap %d\n", ret);
                 return ret;
         }
   
         ret = btrfs_remove_free_space(cache, 512 * 1024, 3 * 1024 * 1024);
         if (ret) {
-               printk(KERN_ERR "Couldn't remove overlapping space %d\n", ret);
+               test_msg("Couldn't remove overlapping space %d\n", ret);
                 return ret;
         }
   
         if (check_exists(cache, 512 * 1024, 3 * 1024 * 1024)) {
-               printk(KERN_ERR "Left over peices after removing "
-                      "overlapping\n");
+               test_msg("Left over peices after removing overlapping\n");
                 return -1;
         }
   
@@@ -3375,24 -3376,24 +3376,24 @@@
         /* Now with the extent entry offset into the bitmap */
         ret = add_free_space_entry(cache, 4 * 1024 * 1024, 4 * 1024 * 1024, 1);
         if (ret) {
-               printk(KERN_ERR "Couldn't add space to the bitmap %d\n", ret);
+               test_msg("Couldn't add space to the bitmap %d\n", ret);
                 return ret;
         }
   
         ret = add_free_space_entry(cache, 2 * 1024 * 1024, 2 * 1024 * 1024, 0);
         if (ret) {
-               printk(KERN_ERR "Couldn't add extent to the cache %d\n", ret);
+               test_msg("Couldn't add extent to the cache %d\n", ret);
                 return ret;
         }
   
         ret = btrfs_remove_free_space(cache, 3 * 1024 * 1024, 4 * 1024 * 1024);
         if (ret) {
-               printk(KERN_ERR "Problem removing overlapping space %d\n", ret);
+               test_msg("Problem removing overlapping space %d\n", ret);
                 return ret;
         }
   
         if (check_exists(cache, 3 * 1024 * 1024, 4 * 1024 * 1024)) {
-               printk(KERN_ERR "Left something behind when removing space");
+               test_msg("Left something behind when removing space");
                 return -1;
         }
   
@@@ -3410,27 -3411,27 +3411,27 @@@
         ret = add_free_space_entry(cache, bitmap_offset + 4 * 1024 * 1024,
                                    4 * 1024 * 1024, 1);
         if (ret) {
-               printk(KERN_ERR "Couldn't add bitmap %d\n", ret);
+               test_msg("Couldn't add bitmap %d\n", ret);
                 return ret;
         }
   
         ret = add_free_space_entry(cache, bitmap_offset - 1 * 1024 * 1024,
                                    5 * 1024 * 1024, 0);
         if (ret) {
-               printk(KERN_ERR "Couldn't add extent entry %d\n", ret);
+               test_msg("Couldn't add extent entry %d\n", ret);
                 return ret;
         }
   
         ret = btrfs_remove_free_space(cache, bitmap_offset + 1 * 1024 * 1024,
                                       5 * 1024 * 1024);
         if (ret) {
-               printk(KERN_ERR "Failed to free our space %d\n", ret);
+               test_msg("Failed to free our space %d\n", ret);
                 return ret;
         }
   
         if (check_exists(cache, bitmap_offset + 1 * 1024 * 1024,
                          5 * 1024 * 1024)) {
-               printk(KERN_ERR "Left stuff over\n");
+               test_msg("Left stuff over\n");
                 return -1;
         }
   
@@@ -3444,20 -3445,19 +3445,19 @@@
          */
         ret = add_free_space_entry(cache, 1 * 1024 * 1024, 2 * 1024 * 1024, 1);
         if (ret) {
-               printk(KERN_ERR "Couldn't add bitmap entry %d\n", ret);
+               test_msg("Couldn't add bitmap entry %d\n", ret);
                 return ret;
         }
   
         ret = add_free_space_entry(cache, 3 * 1024 * 1024, 1 * 1024 * 1024, 0);
         if (ret) {
-               printk(KERN_ERR "Couldn't add extent entry %d\n", ret);
+               test_msg("Couldn't add extent entry %d\n", ret);
                 return ret;
         }
   
         ret = btrfs_remove_free_space(cache, 1 * 1024 * 1024, 3 * 1024 * 1024);
         if (ret) {
-               printk(KERN_ERR "Error removing bitmap and extent "
-                      "overlapping %d\n", ret);
+               test_msg("Error removing bitmap and extent overlapping %d\n", ret);
                 return ret;
         }
   
@@@ -3469,11 -3469,11 +3469,11 @@@ void btrfs_test_free_space_cache(void
   {
         struct btrfs_block_group_cache *cache;
   
-       printk(KERN_ERR "Running btrfs free space cache tests\n");
+       test_msg("Running btrfs free space cache tests\n");
   
         cache = init_test_block_group();
         if (!cache) {
-               printk(KERN_ERR "Couldn't run the tests\n");
+               test_msg("Couldn't run the tests\n");
                 return;
         }
   
@@@ -3487,6 -3487,9 +3487,9 @@@ out
         __btrfs_remove_free_space_cache(cache->free_space_ctl);
         kfree(cache->free_space_ctl);
         kfree(cache);
-       printk(KERN_ERR "Free space cache tests finished\n");
+       test_msg("Free space cache tests finished\n");
   }
- #endif /* CONFIG_BTRFS_FS_RUN_SANITY_TESTS */
+ #undef test_msg
+ #else /* !CONFIG_BTRFS_FS_RUN_SANITY_TESTS */
+ void btrfs_test_free_space_cache(void) {}
+ #endif /* !CONFIG_BTRFS_FS_RUN_SANITY_TESTS */
diff --combined fs/btrfs/inode.c

index 4f9d16b70d3d87da9dd6e3cae926dbaaf4fa3345,55dda871437fdae659ec710cd7d4a03125104165..6d1b93c8aafb8a4d7b832cab8585ebf1ac1ced42
--- 1/fs/btrfs/inode.c
--- 2/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@@ -32,7 -32,6 +32,7 @@@
   #include <linux/writeback.h>
   #include <linux/statfs.h>
   #include <linux/compat.h>
+ +#include <linux/aio.h>
   #include <linux/bit_spinlock.h>
   #include <linux/xattr.h>
   #include <linux/posix_acl.h>
@@@ -42,6 -41,7 +42,7 @@@
   #include <linux/mount.h>
   #include <linux/btrfs.h>
   #include <linux/blkdev.h>
+ #include <linux/posix_acl_xattr.h>
   #include "compat.h"
   #include "ctree.h"
   #include "disk-io.h"
@@@ -57,6 -57,7 +58,7 @@@
   #include "free-space-cache.h"
   #include "inode-map.h"
   #include "backref.h"
+ #include "hash.h"
   
   struct btrfs_iget_args {
         u64 ino;
@@@ -701,8 -702,12 +703,12 @@@ retry
                         async_extent->nr_pages = 0;
                         async_extent->pages = NULL;
   
-                       if (ret == -ENOSPC)
+                       if (ret == -ENOSPC) {
+                               unlock_extent(io_tree, async_extent->start,
+                                             async_extent->start +
+                                             async_extent->ram_size - 1);
                                 goto retry;
+                       }
                         goto out_free;
                 }
   
@@@ -1529,6 -1534,46 +1535,46 @@@ static void btrfs_merge_extent_hook(str
         spin_unlock(&BTRFS_I(inode)->lock);
   }
   
+ static void btrfs_add_delalloc_inodes(struct btrfs_root *root,
+                                     struct inode *inode)
+ {
+       spin_lock(&root->delalloc_lock);
+       if (list_empty(&BTRFS_I(inode)->delalloc_inodes)) {
+               list_add_tail(&BTRFS_I(inode)->delalloc_inodes,
+                             &root->delalloc_inodes);
+               set_bit(BTRFS_INODE_IN_DELALLOC_LIST,
+                       &BTRFS_I(inode)->runtime_flags);
+               root->nr_delalloc_inodes++;
+               if (root->nr_delalloc_inodes == 1) {
+                       spin_lock(&root->fs_info->delalloc_root_lock);
+                       BUG_ON(!list_empty(&root->delalloc_root));
+                       list_add_tail(&root->delalloc_root,
+                                     &root->fs_info->delalloc_roots);
+                       spin_unlock(&root->fs_info->delalloc_root_lock);
+               }
+       }
+       spin_unlock(&root->delalloc_lock);
+ }
+ 
+ static void btrfs_del_delalloc_inode(struct btrfs_root *root,
+                                    struct inode *inode)
+ {
+       spin_lock(&root->delalloc_lock);
+       if (!list_empty(&BTRFS_I(inode)->delalloc_inodes)) {
+               list_del_init(&BTRFS_I(inode)->delalloc_inodes);
+               clear_bit(BTRFS_INODE_IN_DELALLOC_LIST,
+                         &BTRFS_I(inode)->runtime_flags);
+               root->nr_delalloc_inodes--;
+               if (!root->nr_delalloc_inodes) {
+                       spin_lock(&root->fs_info->delalloc_root_lock);
+                       BUG_ON(list_empty(&root->delalloc_root));
+                       list_del_init(&root->delalloc_root);
+                       spin_unlock(&root->fs_info->delalloc_root_lock);
+               }
+       }
+       spin_unlock(&root->delalloc_lock);
+ }
+ 
   /*
    * extent_io.c set_bit_hook, used to track delayed allocation
    * bytes in this file, and to maintain the list of inodes that
@@@ -1561,16 -1606,8 +1607,8 @@@ static void btrfs_set_bit_hook(struct i
                 spin_lock(&BTRFS_I(inode)->lock);
                 BTRFS_I(inode)->delalloc_bytes += len;
                 if (do_list && !test_bit(BTRFS_INODE_IN_DELALLOC_LIST,
-                                        &BTRFS_I(inode)->runtime_flags)) {
-                       spin_lock(&root->fs_info->delalloc_lock);
-                       if (list_empty(&BTRFS_I(inode)->delalloc_inodes)) {
-                               list_add_tail(&BTRFS_I(inode)->delalloc_inodes,
-                                             &root->fs_info->delalloc_inodes);
-                               set_bit(BTRFS_INODE_IN_DELALLOC_LIST,
-                                       &BTRFS_I(inode)->runtime_flags);
-                       }
-                       spin_unlock(&root->fs_info->delalloc_lock);
-               }
+                                        &BTRFS_I(inode)->runtime_flags))
+                       btrfs_add_delalloc_inodes(root, inode);
                 spin_unlock(&BTRFS_I(inode)->lock);
         }
   }
@@@ -1604,7 -1641,7 +1642,7 @@@ static void btrfs_clear_bit_hook(struc
                         btrfs_delalloc_release_metadata(inode, len);
   
                 if (root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID
-                   && do_list)
+                   && do_list && !(state->state & EXTENT_NORESERVE))
                         btrfs_free_reserved_data_space(inode, len);
   
                 __percpu_counter_add(&root->fs_info->delalloc_bytes, -len,
@@@ -1613,15 -1650,8 +1651,8 @@@
                 BTRFS_I(inode)->delalloc_bytes -= len;
                 if (do_list && BTRFS_I(inode)->delalloc_bytes == 0 &&
                     test_bit(BTRFS_INODE_IN_DELALLOC_LIST,
-                            &BTRFS_I(inode)->runtime_flags)) {
-                       spin_lock(&root->fs_info->delalloc_lock);
-                       if (!list_empty(&BTRFS_I(inode)->delalloc_inodes)) {
-                               list_del_init(&BTRFS_I(inode)->delalloc_inodes);
-                               clear_bit(BTRFS_INODE_IN_DELALLOC_LIST,
-                                         &BTRFS_I(inode)->runtime_flags);
-                       }
-                       spin_unlock(&root->fs_info->delalloc_lock);
-               }
+                            &BTRFS_I(inode)->runtime_flags))
+                       btrfs_del_delalloc_inode(root, inode);
                 spin_unlock(&BTRFS_I(inode)->lock);
         }
   }
@@@ -2263,11 -2293,6 +2294,6 @@@ static noinline int relink_extent_backr
                         return 0;
                 return PTR_ERR(root);
         }
-       if (btrfs_root_refs(&root->root_item) == 0) {
-               srcu_read_unlock(&fs_info->subvol_srcu, index);
-               /* parse ENOENT to 0 */
-               return 0;
-       }
   
         /* step 2: get inode */
         key.objectid = backref->inum;
@@@ -3215,13 -3240,16 +3241,16 @@@ int btrfs_orphan_cleanup(struct btrfs_r
                         /* 1 for the orphan item deletion. */
                         trans = btrfs_start_transaction(root, 1);
                         if (IS_ERR(trans)) {
+                               iput(inode);
                                 ret = PTR_ERR(trans);
                                 goto out;
                         }
                         ret = btrfs_orphan_add(trans, inode);
                         btrfs_end_transaction(trans, root);
-                       if (ret)
+                       if (ret) {
+                               iput(inode);
                                 goto out;
+                       }
   
                         ret = btrfs_truncate(inode);
                         if (ret)
@@@ -3274,8 -3302,17 +3303,17 @@@ static noinline int acls_after_inode_it
   {
         u32 nritems = btrfs_header_nritems(leaf);
         struct btrfs_key found_key;
+       static u64 xattr_access = 0;
+       static u64 xattr_default = 0;
         int scanned = 0;
   
+       if (!xattr_access) {
+               xattr_access = btrfs_name_hash(POSIX_ACL_XATTR_ACCESS,
+                                       strlen(POSIX_ACL_XATTR_ACCESS));
+               xattr_default = btrfs_name_hash(POSIX_ACL_XATTR_DEFAULT,
+                                       strlen(POSIX_ACL_XATTR_DEFAULT));
+       }
+ 
         slot++;
         while (slot < nritems) {
                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
@@@ -3285,8 -3322,11 +3323,11 @@@
                         return 0;
   
                 /* we found an xattr, assume we've got an acl */
-               if (found_key.type == BTRFS_XATTR_ITEM_KEY)
-                       return 1;
+               if (found_key.type == BTRFS_XATTR_ITEM_KEY) {
+                       if (found_key.offset == xattr_access ||
+                           found_key.offset == xattr_default)
+                               return 1;
+               }
   
                 /*
                  * we found a key greater than an xattr key, there can't
@@@ -3660,53 -3700,20 +3701,20 @@@ int btrfs_unlink_inode(struct btrfs_tra
         }
         return ret;
   }
-               
- 
- /* helper to check if there is any shared block in the path */
- static int check_path_shared(struct btrfs_root *root,
-                            struct btrfs_path *path)
- {
-       struct extent_buffer *eb;
-       int level;
-       u64 refs = 1;
- 
-       for (level = 0; level < BTRFS_MAX_LEVEL; level++) {
-               int ret;
- 
-               if (!path->nodes[level])
-                       break;
-               eb = path->nodes[level];
-               if (!btrfs_block_can_be_shared(root, eb))
-                       continue;
-               ret = btrfs_lookup_extent_info(NULL, root, eb->start, level, 1,
-                                              &refs, NULL);
-               if (refs > 1)
-                       return 1;
-       }
-       return 0;
- }
   
   /*
    * helper to start transaction for unlink and rmdir.
    *
-  * unlink and rmdir are special in btrfs, they do not always free space.
-  * so in enospc case, we should make sure they will free space before
-  * allowing them to use the global metadata reservation.
+  * unlink and rmdir are special in btrfs, they do not always free space, so
+  * if we cannot make our reservations the normal way try and see if there is
+  * plenty of slack room in the global reserve to migrate, otherwise we cannot
+  * allow the unlink to occur.
    */
- static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir,
-                                                      struct dentry *dentry)
+ static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir)
   {
         struct btrfs_trans_handle *trans;
         struct btrfs_root *root = BTRFS_I(dir)->root;
-       struct btrfs_path *path;
-       struct btrfs_dir_item *di;
-       struct inode *inode = dentry->d_inode;
-       u64 index;
-       int check_link = 1;
-       int err = -ENOSPC;
         int ret;
-       u64 ino = btrfs_ino(inode);
-       u64 dir_ino = btrfs_ino(dir);
   
         /*
          * 1 for the possible orphan item
@@@ -3719,158 -3726,23 +3727,23 @@@
         if (!IS_ERR(trans) || PTR_ERR(trans) != -ENOSPC)
                 return trans;
   
-       if (ino == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)
-               return ERR_PTR(-ENOSPC);
- 
-       /* check if there is someone else holds reference */
-       if (S_ISDIR(inode->i_mode) && atomic_read(&inode->i_count) > 1)
-               return ERR_PTR(-ENOSPC);
- 
-       if (atomic_read(&inode->i_count) > 2)
-               return ERR_PTR(-ENOSPC);
- 
-       if (xchg(&root->fs_info->enospc_unlink, 1))
-               return ERR_PTR(-ENOSPC);
- 
-       path = btrfs_alloc_path();
-       if (!path) {
-               root->fs_info->enospc_unlink = 0;
-               return ERR_PTR(-ENOMEM);
-       }
+       if (PTR_ERR(trans) == -ENOSPC) {
+               u64 num_bytes = btrfs_calc_trans_metadata_size(root, 5);
   
-       /* 1 for the orphan item */
-       trans = btrfs_start_transaction(root, 1);
-       if (IS_ERR(trans)) {
-               btrfs_free_path(path);
-               root->fs_info->enospc_unlink = 0;
-               return trans;
-       }
- 
-       path->skip_locking = 1;
-       path->search_commit_root = 1;
- 
-       ret = btrfs_lookup_inode(trans, root, path,
-                               &BTRFS_I(dir)->location, 0);
-       if (ret < 0) {
-               err = ret;
-               goto out;
-       }
-       if (ret == 0) {
-               if (check_path_shared(root, path))
-                       goto out;
-       } else {
-               check_link = 0;
-       }
-       btrfs_release_path(path);
- 
-       ret = btrfs_lookup_inode(trans, root, path,
-                               &BTRFS_I(inode)->location, 0);
-       if (ret < 0) {
-               err = ret;
-               goto out;
-       }
-       if (ret == 0) {
-               if (check_path_shared(root, path))
-                       goto out;
-       } else {
-               check_link = 0;
-       }
-       btrfs_release_path(path);
- 
-       if (ret == 0 && S_ISREG(inode->i_mode)) {
-               ret = btrfs_lookup_file_extent(trans, root, path,
-                                              ino, (u64)-1, 0);
-               if (ret < 0) {
-                       err = ret;
-                       goto out;
+               trans = btrfs_start_transaction(root, 0);
+               if (IS_ERR(trans))
+                       return trans;
+               ret = btrfs_cond_migrate_bytes(root->fs_info,
+                                              &root->fs_info->trans_block_rsv,
+                                              num_bytes, 5);
+               if (ret) {
+                       btrfs_end_transaction(trans, root);
+                       return ERR_PTR(ret);
                 }
-               BUG_ON(ret == 0); /* Corruption */
-               if (check_path_shared(root, path))
-                       goto out;
-               btrfs_release_path(path);
-       }
- 
-       if (!check_link) {
-               err = 0;
-               goto out;
-       }
- 
-       di = btrfs_lookup_dir_item(trans, root, path, dir_ino,
-                               dentry->d_name.name, dentry->d_name.len, 0);
-       if (IS_ERR(di)) {
-               err = PTR_ERR(di);
-               goto out;
-       }
-       if (di) {
-               if (check_path_shared(root, path))
-                       goto out;
-       } else {
-               err = 0;
-               goto out;
-       }
-       btrfs_release_path(path);
- 
-       ret = btrfs_get_inode_ref_index(trans, root, path, dentry->d_name.name,
-                                       dentry->d_name.len, ino, dir_ino, 0,
-                                       &index);
-       if (ret) {
-               err = ret;
-               goto out;
-       }
- 
-       if (check_path_shared(root, path))
-               goto out;
- 
-       btrfs_release_path(path);
- 
-       /*
-        * This is a commit root search, if we can lookup inode item and other
-        * relative items in the commit root, it means the transaction of
-        * dir/file creation has been committed, and the dir index item that we
-        * delay to insert has also been inserted into the commit root. So
-        * we needn't worry about the delayed insertion of the dir index item
-        * here.
-        */
-       di = btrfs_lookup_dir_index_item(trans, root, path, dir_ino, index,
-                               dentry->d_name.name, dentry->d_name.len, 0);
-       if (IS_ERR(di)) {
-               err = PTR_ERR(di);
-               goto out;
-       }
-       BUG_ON(ret == -ENOENT);
-       if (check_path_shared(root, path))
-               goto out;
- 
-       err = 0;
- out:
-       btrfs_free_path(path);
-       /* Migrate the orphan reservation over */
-       if (!err)
-               err = btrfs_block_rsv_migrate(trans->block_rsv,
-                               &root->fs_info->global_block_rsv,
-                               trans->bytes_reserved);
- 
-       if (err) {
-               btrfs_end_transaction(trans, root);
-               root->fs_info->enospc_unlink = 0;
-               return ERR_PTR(err);
-       }
- 
-       trans->block_rsv = &root->fs_info->global_block_rsv;
-       return trans;
- }
- 
- static void __unlink_end_trans(struct btrfs_trans_handle *trans,
-                              struct btrfs_root *root)
- {
-       if (trans->block_rsv->type == BTRFS_BLOCK_RSV_GLOBAL) {
-               btrfs_block_rsv_release(root, trans->block_rsv,
-                                       trans->bytes_reserved);
                 trans->block_rsv = &root->fs_info->trans_block_rsv;
-               BUG_ON(!root->fs_info->enospc_unlink);
-               root->fs_info->enospc_unlink = 0;
+               trans->bytes_reserved = num_bytes;
         }
-       btrfs_end_transaction(trans, root);
+       return trans;
   }
   
   static int btrfs_unlink(struct inode *dir, struct dentry *dentry)
@@@ -3880,7 -3752,7 +3753,7 @@@
         struct inode *inode = dentry->d_inode;
         int ret;
   
-       trans = __unlink_start_trans(dir, dentry);
+       trans = __unlink_start_trans(dir);
         if (IS_ERR(trans))
                 return PTR_ERR(trans);
   
@@@ -3898,7 -3770,7 +3771,7 @@@
         }
   
   out:
-       __unlink_end_trans(trans, root);
+       btrfs_end_transaction(trans, root);
         btrfs_btree_balance_dirty(root);
         return ret;
   }
@@@ -3995,7 -3867,7 +3868,7 @@@ static int btrfs_rmdir(struct inode *di
         if (btrfs_ino(inode) == BTRFS_FIRST_FREE_OBJECTID)
                 return -EPERM;
   
-       trans = __unlink_start_trans(dir, dentry);
+       trans = __unlink_start_trans(dir);
         if (IS_ERR(trans))
                 return PTR_ERR(trans);
   
@@@ -4017,7 -3889,7 +3890,7 @@@
         if (!err)
                 btrfs_i_size_write(inode, 0);
   out:
-       __unlink_end_trans(trans, root);
+       btrfs_end_transaction(trans, root);
         btrfs_btree_balance_dirty(root);
   
         return err;
@@@ -4395,6 -4267,15 +4268,15 @@@ int btrfs_cont_expand(struct inode *ino
         u64 hole_size;
         int err = 0;
   
+       /*
+        * If our size started in the middle of a page we need to zero out the
+        * rest of the page before we expand the i_size, otherwise we could
+        * expose stale data.
+        */
+       err = btrfs_truncate_page(inode, oldsize, 0, 0);
+       if (err)
+               return err;
+ 
         if (size <= hole_start)
                 return 0;
   
@@@ -4822,11 -4703,6 +4704,6 @@@ static int fixup_tree_root_location(str
                 goto out;
         }
   
-       if (btrfs_root_refs(&new_root->root_item) == 0) {
-               err = -ENOENT;
-               goto out;
-       }
- 
         *sub_root = new_root;
         location->objectid = btrfs_root_dirid(&new_root->root_item);
         location->type = BTRFS_INODE_ITEM_KEY;
@@@ -5092,8 -4968,10 +4969,10 @@@ struct inode *btrfs_lookup_dentry(struc
                 if (!(inode->i_sb->s_flags & MS_RDONLY))
                         ret = btrfs_orphan_cleanup(sub_root);
                 up_read(&root->fs_info->cleanup_work_sem);
-               if (ret)
+               if (ret) {
+                       iput(inode);
                         inode = ERR_PTR(ret);
+               }
         }
   
         return inode;
@@@ -5137,9 -5015,10 +5016,9 @@@ unsigned char btrfs_filetype_table[] = 
         DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK
   };
   
- -static int btrfs_real_readdir(struct file *filp, void *dirent,
- -                            filldir_t filldir)
+ +static int btrfs_real_readdir(struct file *file, struct dir_context *ctx)
   {
- -      struct inode *inode = file_inode(filp);
+ +      struct inode *inode = file_inode(file);
         struct btrfs_root *root = BTRFS_I(inode)->root;
         struct btrfs_item *item;
         struct btrfs_dir_item *di;
@@@ -5160,15 -5039,29 +5039,15 @@@
         char tmp_name[32];
         char *name_ptr;
         int name_len;
- -      int is_curr = 0;        /* filp->f_pos points to the current index? */
+ +      int is_curr = 0;        /* ctx->pos points to the current index? */
   
         /* FIXME, use a real flag for deciding about the key type */
         if (root->fs_info->tree_root == root)
                 key_type = BTRFS_DIR_ITEM_KEY;
   
- -      /* special case for "." */
- -      if (filp->f_pos == 0) {
- -              over = filldir(dirent, ".", 1,
- -                             filp->f_pos, btrfs_ino(inode), DT_DIR);
- -              if (over)
- -                      return 0;
- -              filp->f_pos = 1;
- -      }
- -      /* special case for .., just use the back ref */
- -      if (filp->f_pos == 1) {
- -              u64 pino = parent_ino(filp->f_path.dentry);
- -              over = filldir(dirent, "..", 2,
- -                             filp->f_pos, pino, DT_DIR);
- -              if (over)
- -                      return 0;
- -              filp->f_pos = 2;
- -      }
+ +      if (!dir_emit_dots(file, ctx))
+ +              return 0;
+ +
         path = btrfs_alloc_path();
         if (!path)
                 return -ENOMEM;
@@@ -5182,7 -5075,7 +5061,7 @@@
         }
   
         btrfs_set_key_type(&key, key_type);
- -      key.offset = filp->f_pos;
+ +      key.offset = ctx->pos;
         key.objectid = btrfs_ino(inode);
   
         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
@@@ -5208,14 -5101,14 +5087,14 @@@
                         break;
                 if (btrfs_key_type(&found_key) != key_type)
                         break;
- -              if (found_key.offset < filp->f_pos)
+ +              if (found_key.offset < ctx->pos)
                         goto next;
                 if (key_type == BTRFS_DIR_INDEX_KEY &&
                     btrfs_should_delete_dir_index(&del_list,
                                                   found_key.offset))
                         goto next;
   
- -              filp->f_pos = found_key.offset;
+ +              ctx->pos = found_key.offset;
                 is_curr = 1;
   
                 di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item);
@@@ -5259,8 -5152,9 +5138,8 @@@
                                 over = 0;
                                 goto skip;
                         }
- -                      over = filldir(dirent, name_ptr, name_len,
- -                                     found_key.offset, location.objectid,
- -                                     d_type);
+ +                      over = !dir_emit(ctx, name_ptr, name_len,
+ +                                     location.objectid, d_type);
   
   skip:
                         if (name_ptr != tmp_name)
@@@ -5279,8 -5173,9 +5158,8 @@@ next
   
         if (key_type == BTRFS_DIR_INDEX_KEY) {
                 if (is_curr)
- -                      filp->f_pos++;
- -              ret = btrfs_readdir_delayed_dir_index(filp, dirent, filldir,
- -                                                    &ins_list);
+ +                      ctx->pos++;
+ +              ret = btrfs_readdir_delayed_dir_index(ctx, &ins_list);
                 if (ret)
                         goto nopos;
         }
@@@ -5291,9 -5186,9 +5170,9 @@@
                  * 32-bit glibc will use getdents64, but then strtol -
                  * so the last number we can serve is this.
                  */
- -              filp->f_pos = 0x7fffffff;
+ +              ctx->pos = 0x7fffffff;
         else
- -              filp->f_pos++;
+ +              ctx->pos++;
   nopos:
         ret = 0;
   err:
@@@ -6501,10 -6396,10 +6380,10 @@@ out
    * returns 1 when the nocow is safe, < 1 on error, 0 if the
    * block must be cow'd
    */
- static noinline int can_nocow_odirect(struct btrfs_trans_handle *trans,
-                                     struct inode *inode, u64 offset, u64 *len,
-                                     u64 *orig_start, u64 *orig_block_len,
-                                     u64 *ram_bytes)
+ noinline int can_nocow_extent(struct btrfs_trans_handle *trans,
+                             struct inode *inode, u64 offset, u64 *len,
+                             u64 *orig_start, u64 *orig_block_len,
+                             u64 *ram_bytes)
   {
         struct btrfs_path *path;
         int ret;
@@@ -6518,7 -6413,7 +6397,7 @@@
         u64 num_bytes;
         int slot;
         int found_type;
- 
+       bool nocow = (BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW);
         path = btrfs_alloc_path();
         if (!path)
                 return -ENOMEM;
@@@ -6558,18 -6453,28 +6437,28 @@@
                 /* not a regular extent, must cow */
                 goto out;
         }
+ 
+       if (!nocow && found_type == BTRFS_FILE_EXTENT_REG)
+               goto out;
+ 
         disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
+       if (disk_bytenr == 0)
+               goto out;
+ 
+       if (btrfs_file_extent_compression(leaf, fi) ||
+           btrfs_file_extent_encryption(leaf, fi) ||
+           btrfs_file_extent_other_encoding(leaf, fi))
+               goto out;
+ 
         backref_offset = btrfs_file_extent_offset(leaf, fi);
   
-       *orig_start = key.offset - backref_offset;
-       *orig_block_len = btrfs_file_extent_disk_num_bytes(leaf, fi);
-       *ram_bytes = btrfs_file_extent_ram_bytes(leaf, fi);
+       if (orig_start) {
+               *orig_start = key.offset - backref_offset;
+               *orig_block_len = btrfs_file_extent_disk_num_bytes(leaf, fi);
+               *ram_bytes = btrfs_file_extent_ram_bytes(leaf, fi);
+       }
   
         extent_end = key.offset + btrfs_file_extent_num_bytes(leaf, fi);
-       if (extent_end < offset + *len) {
-               /* extent doesn't include our full range, must cow */
-               goto out;
-       }
   
         if (btrfs_extent_readonly(root, disk_bytenr))
                 goto out;
@@@ -6813,8 -6718,8 +6702,8 @@@ static int btrfs_get_blocks_direct(stru
                 if (IS_ERR(trans))
                         goto must_cow;
   
-               if (can_nocow_odirect(trans, inode, start, &len, &orig_start,
-                                     &orig_block_len, &ram_bytes) == 1) {
+               if (can_nocow_extent(trans, inode, start, &len, &orig_start,
+                                    &orig_block_len, &ram_bytes) == 1) {
                         if (type == BTRFS_ORDERED_PREALLOC) {
                                 free_extent_map(em);
                                 em = create_pinned_em(inode, start, len,
@@@ -7243,7 -7148,6 +7132,6 @@@ static void btrfs_submit_direct(int rw
   {
         struct btrfs_root *root = BTRFS_I(inode)->root;
         struct btrfs_dio_private *dip;
-       struct bio_vec *bvec = dio_bio->bi_io_vec;
         struct bio *io_bio;
         int skip_sum;
         int write = rw & REQ_WRITE;
@@@ -7265,16 -7169,9 +7153,9 @@@
         }
   
         dip->private = dio_bio->bi_private;
-       io_bio->bi_private = dio_bio->bi_private;
         dip->inode = inode;
         dip->logical_offset = file_offset;
- 
-       dip->bytes = 0;
-       do {
-               dip->bytes += bvec->bv_len;
-               bvec++;
-       } while (bvec <= (dio_bio->bi_io_vec + dio_bio->bi_vcnt - 1));
- 
+       dip->bytes = dio_bio->bi_size;
         dip->disk_bytenr = (u64)dio_bio->bi_sector << 9;
         io_bio->bi_private = dip;
         dip->errors = 0;
@@@ -7373,8 -7270,16 +7254,16 @@@ static ssize_t btrfs_direct_IO(int rw, 
         atomic_inc(&inode->i_dio_count);
         smp_mb__after_atomic_inc();
   
+       /*
+        * The generic stuff only does filemap_write_and_wait_range, which isn't
+        * enough if we've written compressed pages to this area, so we need to
+        * call btrfs_wait_ordered_range to make absolutely sure that any
+        * outstanding dirty pages are on disk.
+        */
+       count = iov_length(iov, nr_segs);
+       btrfs_wait_ordered_range(inode, offset, count);
+ 
         if (rw & WRITE) {
-               count = iov_length(iov, nr_segs);
                 /*
                  * If the write DIO is beyond the EOF, we need update
                  * the isize, but it is protected by i_mutex. So we can
@@@ -7493,8 -7398,7 +7382,8 @@@ static int btrfs_releasepage(struct pag
         return __btrfs_releasepage(page, gfp_flags & GFP_NOFS);
   }
   
- -static void btrfs_invalidatepage(struct page *page, unsigned long offset)
+ +static void btrfs_invalidatepage(struct page *page, unsigned int offset,
+ +                               unsigned int length)
   {
         struct inode *inode = page->mapping->host;
         struct extent_io_tree *tree;
@@@ -7694,16 -7598,12 +7583,12 @@@ static int btrfs_truncate(struct inode 
   {
         struct btrfs_root *root = BTRFS_I(inode)->root;
         struct btrfs_block_rsv *rsv;
-       int ret;
+       int ret = 0;
         int err = 0;
         struct btrfs_trans_handle *trans;
         u64 mask = root->sectorsize - 1;
         u64 min_size = btrfs_calc_trunc_metadata_size(root, 1);
   
-       ret = btrfs_truncate_page(inode, inode->i_size, 0, 0);
-       if (ret)
-               return ret;
- 
         btrfs_wait_ordered_range(inode, inode->i_size & (~mask), (u64)-1);
         btrfs_ordered_update_i_size(inode, inode->i_size, NULL);
   
@@@ -7961,9 -7861,9 +7846,9 @@@ void btrfs_destroy_inode(struct inode *
          */
         smp_mb();
         if (!list_empty(&BTRFS_I(inode)->ordered_operations)) {
-               spin_lock(&root->fs_info->ordered_extent_lock);
+               spin_lock(&root->fs_info->ordered_root_lock);
                 list_del_init(&BTRFS_I(inode)->ordered_operations);
-               spin_unlock(&root->fs_info->ordered_extent_lock);
+               spin_unlock(&root->fs_info->ordered_root_lock);
         }
   
         if (test_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
@@@ -8333,7 -8233,7 +8218,7 @@@ void btrfs_wait_and_free_delalloc_work(
    * some fairly slow code that needs optimization. This walks the list
    * of all the inodes with pending delalloc and forces them to disk.
    */
- int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput)
+ static int __start_delalloc_inodes(struct btrfs_root *root, int delay_iput)
   {
         struct btrfs_inode *binode;
         struct inode *inode;
@@@ -8342,30 -8242,23 +8227,23 @@@
         struct list_head splice;
         int ret = 0;
   
-       if (root->fs_info->sb->s_flags & MS_RDONLY)
-               return -EROFS;
- 
         INIT_LIST_HEAD(&works);
         INIT_LIST_HEAD(&splice);
   
-       spin_lock(&root->fs_info->delalloc_lock);
-       list_splice_init(&root->fs_info->delalloc_inodes, &splice);
+       spin_lock(&root->delalloc_lock);
+       list_splice_init(&root->delalloc_inodes, &splice);
         while (!list_empty(&splice)) {
                 binode = list_entry(splice.next, struct btrfs_inode,
                                     delalloc_inodes);
   
-               list_del_init(&binode->delalloc_inodes);
- 
+               list_move_tail(&binode->delalloc_inodes,
+                              &root->delalloc_inodes);
                 inode = igrab(&binode->vfs_inode);
                 if (!inode) {
-                       clear_bit(BTRFS_INODE_IN_DELALLOC_LIST,
-                                 &binode->runtime_flags);
+                       cond_resched_lock(&root->delalloc_lock);
                         continue;
                 }
- 
-               list_add_tail(&binode->delalloc_inodes,
-                             &root->fs_info->delalloc_inodes);
-               spin_unlock(&root->fs_info->delalloc_lock);
+               spin_unlock(&root->delalloc_lock);
   
                 work = btrfs_alloc_delalloc_work(inode, 0, delay_iput);
                 if (unlikely(!work)) {
@@@ -8377,16 -8270,39 +8255,39 @@@
                                    &work->work);
   
                 cond_resched();
-               spin_lock(&root->fs_info->delalloc_lock);
+               spin_lock(&root->delalloc_lock);
         }
-       spin_unlock(&root->fs_info->delalloc_lock);
+       spin_unlock(&root->delalloc_lock);
   
         list_for_each_entry_safe(work, next, &works, list) {
                 list_del_init(&work->list);
                 btrfs_wait_and_free_delalloc_work(work);
         }
+       return 0;
+ out:
+       list_for_each_entry_safe(work, next, &works, list) {
+               list_del_init(&work->list);
+               btrfs_wait_and_free_delalloc_work(work);
+       }
+ 
+       if (!list_empty_careful(&splice)) {
+               spin_lock(&root->delalloc_lock);
+               list_splice_tail(&splice, &root->delalloc_inodes);
+               spin_unlock(&root->delalloc_lock);
+       }
+       return ret;
+ }
+ 
+ int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput)
+ {
+       int ret;
   
-       /* the filemap_flush will queue IO into the worker threads, but
+       if (root->fs_info->sb->s_flags & MS_RDONLY)
+               return -EROFS;
+ 
+       ret = __start_delalloc_inodes(root, delay_iput);
+       /*
+        * the filemap_flush will queue IO into the worker threads, but
          * we have to make sure the IO is actually started and that
          * ordered extents get created before we return
          */
@@@ -8398,17 -8314,55 +8299,55 @@@
                     atomic_read(&root->fs_info->async_delalloc_pages) == 0));
         }
         atomic_dec(&root->fs_info->async_submit_draining);
-       return 0;
- out:
-       list_for_each_entry_safe(work, next, &works, list) {
-               list_del_init(&work->list);
-               btrfs_wait_and_free_delalloc_work(work);
+       return ret;
+ }
+ 
+ int btrfs_start_all_delalloc_inodes(struct btrfs_fs_info *fs_info,
+                                   int delay_iput)
+ {
+       struct btrfs_root *root;
+       struct list_head splice;
+       int ret;
+ 
+       if (fs_info->sb->s_flags & MS_RDONLY)
+               return -EROFS;
+ 
+       INIT_LIST_HEAD(&splice);
+ 
+       spin_lock(&fs_info->delalloc_root_lock);
+       list_splice_init(&fs_info->delalloc_roots, &splice);
+       while (!list_empty(&splice)) {
+               root = list_first_entry(&splice, struct btrfs_root,
+                                       delalloc_root);
+               root = btrfs_grab_fs_root(root);
+               BUG_ON(!root);
+               list_move_tail(&root->delalloc_root,
+                              &fs_info->delalloc_roots);
+               spin_unlock(&fs_info->delalloc_root_lock);
+ 
+               ret = __start_delalloc_inodes(root, delay_iput);
+               btrfs_put_fs_root(root);
+               if (ret)
+                       goto out;
+ 
+               spin_lock(&fs_info->delalloc_root_lock);
         }
+       spin_unlock(&fs_info->delalloc_root_lock);
   
+       atomic_inc(&fs_info->async_submit_draining);
+       while (atomic_read(&fs_info->nr_async_submits) ||
+             atomic_read(&fs_info->async_delalloc_pages)) {
+               wait_event(fs_info->async_submit_wait,
+                  (atomic_read(&fs_info->nr_async_submits) == 0 &&
+                   atomic_read(&fs_info->async_delalloc_pages) == 0));
+       }
+       atomic_dec(&fs_info->async_submit_draining);
+       return 0;
+ out:
         if (!list_empty_careful(&splice)) {
-               spin_lock(&root->fs_info->delalloc_lock);
-               list_splice_tail(&splice, &root->fs_info->delalloc_inodes);
-               spin_unlock(&root->fs_info->delalloc_lock);
+               spin_lock(&fs_info->delalloc_root_lock);
+               list_splice_tail(&splice, &fs_info->delalloc_roots);
+               spin_unlock(&fs_info->delalloc_root_lock);
         }
         return ret;
   }
@@@ -8715,7 -8669,7 +8654,7 @@@ static const struct inode_operations bt
   static const struct file_operations btrfs_dir_file_operations = {
         .llseek         = generic_file_llseek,
         .read           = generic_read_dir,
- -      .readdir        = btrfs_real_readdir,
+ +      .iterate        = btrfs_real_readdir,
         .unlocked_ioctl = btrfs_ioctl,
   #ifdef CONFIG_COMPAT
         .compat_ioctl   = btrfs_ioctl,
diff --combined fs/btrfs/ioctl.c

index cd7e96c73cb71df0589f1866346ab5ff2714eb96,0e17a30f39a2f38798394fc4bca1e73c7ee1875e..238a05545ee2230629fc850191f348b94cadd8cf
--- 1/fs/btrfs/ioctl.c
--- 2/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@@ -555,6 -555,12 +555,12 @@@ static int create_snapshot(struct btrfs
         if (!root->ref_cows)
                 return -EINVAL;
   
+       ret = btrfs_start_delalloc_inodes(root, 0);
+       if (ret)
+               return ret;
+ 
+       btrfs_wait_ordered_extents(root, 0);
+ 
         pending_snapshot = kzalloc(sizeof(*pending_snapshot), GFP_NOFS);
         if (!pending_snapshot)
                 return -ENOMEM;
@@@ -2354,14 -2360,6 +2360,6 @@@ static long btrfs_ioctl_rm_dev(struct f
         if (ret)
                 return ret;
   
-       if (atomic_xchg(&root->fs_info->mutually_exclusive_operation_running,
-                       1)) {
-               pr_info("btrfs: dev add/delete/balance/replace/resize operation in progress\n");
-               mnt_drop_write_file(file);
-               return -EINVAL;
-       }
- 
-       mutex_lock(&root->fs_info->volume_mutex);
         vol_args = memdup_user(arg, sizeof(*vol_args));
         if (IS_ERR(vol_args)) {
                 ret = PTR_ERR(vol_args);
@@@ -2369,12 -2367,20 +2367,20 @@@
         }
   
         vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
-       ret = btrfs_rm_device(root, vol_args->name);
   
-       kfree(vol_args);
- out:
+       if (atomic_xchg(&root->fs_info->mutually_exclusive_operation_running,
+                       1)) {
+               ret = BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS;
+               goto out;
+       }
+ 
+       mutex_lock(&root->fs_info->volume_mutex);
+       ret = btrfs_rm_device(root, vol_args->name);
         mutex_unlock(&root->fs_info->volume_mutex);
         atomic_set(&root->fs_info->mutually_exclusive_operation_running, 0);
+ 
+ out:
+       kfree(vol_args);
         mnt_drop_write_file(file);
         return ret;
   }
@@@ -2480,6 -2486,7 +2486,7 @@@ static noinline long btrfs_ioctl_clone(
         int ret;
         u64 len = olen;
         u64 bs = root->fs_info->sb->s_blocksize;
+       int same_inode = 0;
   
         /*
          * TODO:
@@@ -2516,7 -2523,7 +2523,7 @@@
   
         ret = -EINVAL;
         if (src == inode)
-               goto out_fput;
+               same_inode = 1;
   
         /* the src must be open for reading */
         if (!(src_file.file->f_mode & FMODE_READ))
@@@ -2547,12 -2554,16 +2554,16 @@@
         }
         path->reada = 2;
   
-       if (inode < src) {
-               mutex_lock_nested(&inode->i_mutex, I_MUTEX_PARENT);
-               mutex_lock_nested(&src->i_mutex, I_MUTEX_CHILD);
+       if (!same_inode) {
+               if (inode < src) {
+                       mutex_lock_nested(&inode->i_mutex, I_MUTEX_PARENT);
+                       mutex_lock_nested(&src->i_mutex, I_MUTEX_CHILD);
+               } else {
+                       mutex_lock_nested(&src->i_mutex, I_MUTEX_PARENT);
+                       mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD);
+               }
         } else {
-               mutex_lock_nested(&src->i_mutex, I_MUTEX_PARENT);
-               mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD);
+               mutex_lock(&src->i_mutex);
         }
   
         /* determine range to clone */
@@@ -2570,6 -2581,12 +2581,12 @@@
             !IS_ALIGNED(destoff, bs))
                 goto out_unlock;
   
+       /* verify if ranges are overlapped within the same file */
+       if (same_inode) {
+               if (destoff + len > off && destoff < off + len)
+                       goto out_unlock;
+       }
+ 
         if (destoff > inode->i_size) {
                 ret = btrfs_cont_expand(inode, inode->i_size, destoff);
                 if (ret)
@@@ -2846,7 -2863,8 +2863,8 @@@ out
         unlock_extent(&BTRFS_I(src)->io_tree, off, off + len - 1);
   out_unlock:
         mutex_unlock(&src->i_mutex);
-       mutex_unlock(&inode->i_mutex);
+       if (!same_inode)
+               mutex_unlock(&inode->i_mutex);
         vfree(buf);
         btrfs_free_path(path);
   out_fput:
@@@ -2951,11 -2969,6 +2969,6 @@@ static long btrfs_ioctl_default_subvol(
                 goto out;
         }
   
-       if (btrfs_root_refs(&new_root->root_item) == 0) {
-               ret = -ENOENT;
-               goto out;
-       }
- 
         path = btrfs_alloc_path();
         if (!path) {
                 ret = -ENOMEM;
@@@ -3719,9 -3732,6 +3732,6 @@@ static long btrfs_ioctl_quota_ctl(struc
                 break;
         }
   
-       if (copy_to_user(arg, sa, sizeof(*sa)))
-               ret = -EFAULT;
- 
         err = btrfs_commit_transaction(trans, root->fs_info->tree_root);
         if (err && !ret)
                 ret = err;
@@@ -3881,7 -3891,7 +3891,7 @@@ drop_write
   
   static long btrfs_ioctl_quota_rescan(struct file *file, void __user *arg)
   {
- -      struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root;
+ +      struct btrfs_root *root = BTRFS_I(file_inode(file))->root;
         struct btrfs_ioctl_quota_rescan_args *qsa;
         int ret;
   
@@@ -3914,7 -3924,7 +3924,7 @@@ drop_write
   
   static long btrfs_ioctl_quota_rescan_status(struct file *file, void __user *arg)
   {
- -      struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root;
+ +      struct btrfs_root *root = BTRFS_I(file_inode(file))->root;
         struct btrfs_ioctl_quota_rescan_args *qsa;
         int ret = 0;
   
@@@ -3937,6 -3947,16 +3947,16 @@@
         return ret;
   }
   
+ static long btrfs_ioctl_quota_rescan_wait(struct file *file, void __user *arg)
+ {
+       struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root;
+ 
+       if (!capable(CAP_SYS_ADMIN))
+               return -EPERM;
+ 
+       return btrfs_qgroup_wait_for_completion(root->fs_info);
+ }
+ 
   static long btrfs_ioctl_set_received_subvol(struct file *file,
                                             void __user *arg)
   {
@@@ -4020,7 -4040,7 +4040,7 @@@ out
   
   static int btrfs_ioctl_get_fslabel(struct file *file, void __user *arg)
   {
- -      struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root;
+ +      struct btrfs_root *root = BTRFS_I(file_inode(file))->root;
         const char *label = root->fs_info->super_copy->label;
         size_t len = strnlen(label, BTRFS_LABEL_SIZE);
         int ret;
@@@ -4039,7 -4059,7 +4059,7 @@@
   
   static int btrfs_ioctl_set_fslabel(struct file *file, void __user *arg)
   {
- -      struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root;
+ +      struct btrfs_root *root = BTRFS_I(file_inode(file))->root;
         struct btrfs_super_block *super_block = root->fs_info->super_copy;
         struct btrfs_trans_handle *trans;
         char label[BTRFS_LABEL_SIZE];
@@@ -4179,6 -4199,8 +4199,8 @@@ long btrfs_ioctl(struct file *file, uns
                 return btrfs_ioctl_quota_rescan(file, argp);
         case BTRFS_IOC_QUOTA_RESCAN_STATUS:
                 return btrfs_ioctl_quota_rescan_status(file, argp);
+       case BTRFS_IOC_QUOTA_RESCAN_WAIT:
+               return btrfs_ioctl_quota_rescan_wait(file, argp);
         case BTRFS_IOC_DEV_REPLACE:
                 return btrfs_ioctl_dev_replace(root, argp);
         case BTRFS_IOC_GET_FSLABEL:
diff --combined fs/btrfs/volumes.c

index 8bffb9174afba04d8375b96f754256b68ff9b4ef,b2d1eacc07c99684f3611b7dc7084b7d2cfdc1d2..78b871753cb61e099abdfca27a0e316c37c329ee
--- 1/fs/btrfs/volumes.c
--- 2/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@@ -982,6 -982,35 +982,35 @@@ out
         return ret;
   }
   
+ static int contains_pending_extent(struct btrfs_trans_handle *trans,
+                                  struct btrfs_device *device,
+                                  u64 *start, u64 len)
+ {
+       struct extent_map *em;
+       int ret = 0;
+ 
+       list_for_each_entry(em, &trans->transaction->pending_chunks, list) {
+               struct map_lookup *map;
+               int i;
+ 
+               map = (struct map_lookup *)em->bdev;
+               for (i = 0; i < map->num_stripes; i++) {
+                       if (map->stripes[i].dev != device)
+                               continue;
+                       if (map->stripes[i].physical >= *start + len ||
+                           map->stripes[i].physical + em->orig_block_len <=
+                           *start)
+                               continue;
+                       *start = map->stripes[i].physical +
+                               em->orig_block_len;
+                       ret = 1;
+               }
+       }
+ 
+       return ret;
+ }
+ 
+ 
   /*
    * find_free_dev_extent - find free space in the specified device
    * @device:   the device which we search the free space in
@@@ -1002,7 -1031,8 +1031,8 @@@
    * But if we don't find suitable free space, it is used to store the size of
    * the max free space.
    */
- int find_free_dev_extent(struct btrfs_device *device, u64 num_bytes,
+ int find_free_dev_extent(struct btrfs_trans_handle *trans,
+                        struct btrfs_device *device, u64 num_bytes,
                          u64 *start, u64 *len)
   {
         struct btrfs_key key;
@@@ -1026,21 -1056,22 +1056,22 @@@
          */
         search_start = max(root->fs_info->alloc_start, 1024ull * 1024);
   
+       path = btrfs_alloc_path();
+       if (!path)
+               return -ENOMEM;
+ again:
         max_hole_start = search_start;
         max_hole_size = 0;
         hole_size = 0;
   
         if (search_start >= search_end || device->is_tgtdev_for_dev_replace) {
                 ret = -ENOSPC;
-               goto error;
+               goto out;
         }
   
-       path = btrfs_alloc_path();
-       if (!path) {
-               ret = -ENOMEM;
-               goto error;
-       }
         path->reada = 2;
+       path->search_commit_root = 1;
+       path->skip_locking = 1;
   
         key.objectid = device->devid;
         key.offset = search_start;
@@@ -1081,6 -1112,15 +1112,15 @@@
                 if (key.offset > search_start) {
                         hole_size = key.offset - search_start;
   
+                       /*
+                        * Have to check before we set max_hole_start, otherwise
+                        * we could end up sending back this offset anyway.
+                        */
+                       if (contains_pending_extent(trans, device,
+                                                   &search_start,
+                                                   hole_size))
+                               hole_size = 0;
+ 
                         if (hole_size > max_hole_size) {
                                 max_hole_start = search_start;
                                 max_hole_size = hole_size;
@@@ -1124,6 -1164,11 +1164,11 @@@ next
                 max_hole_size = hole_size;
         }
   
+       if (contains_pending_extent(trans, device, &search_start, hole_size)) {
+               btrfs_release_path(path);
+               goto again;
+       }
+ 
         /* See above. */
         if (hole_size < num_bytes)
                 ret = -ENOSPC;
@@@ -1132,7 -1177,6 +1177,6 @@@
   
   out:
         btrfs_free_path(path);
- error:
         *start = max_hole_start;
         if (len)
                 *len = max_hole_size;
@@@ -1244,47 -1288,22 +1288,22 @@@ out
         return ret;
   }
   
- static noinline int find_next_chunk(struct btrfs_root *root,
-                                   u64 objectid, u64 *offset)
+ static u64 find_next_chunk(struct btrfs_fs_info *fs_info)
   {
-       struct btrfs_path *path;
-       int ret;
-       struct btrfs_key key;
-       struct btrfs_chunk *chunk;
-       struct btrfs_key found_key;
- 
-       path = btrfs_alloc_path();
-       if (!path)
-               return -ENOMEM;
- 
-       key.objectid = objectid;
-       key.offset = (u64)-1;
-       key.type = BTRFS_CHUNK_ITEM_KEY;
- 
-       ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
-       if (ret < 0)
-               goto error;
- 
-       BUG_ON(ret == 0); /* Corruption */
+       struct extent_map_tree *em_tree;
+       struct extent_map *em;
+       struct rb_node *n;
+       u64 ret = 0;
   
-       ret = btrfs_previous_item(root, path, 0, BTRFS_CHUNK_ITEM_KEY);
-       if (ret) {
-               *offset = 0;
-       } else {
-               btrfs_item_key_to_cpu(path->nodes[0], &found_key,
-                                     path->slots[0]);
-               if (found_key.objectid != objectid)
-                       *offset = 0;
-               else {
-                       chunk = btrfs_item_ptr(path->nodes[0], path->slots[0],
-                                              struct btrfs_chunk);
-                       *offset = found_key.offset +
-                               btrfs_chunk_length(path->nodes[0], chunk);
-               }
+       em_tree = &fs_info->mapping_tree.map_tree;
+       read_lock(&em_tree->lock);
+       n = rb_last(&em_tree->map);
+       if (n) {
+               em = rb_entry(n, struct extent_map, rb_node);
+               ret = em->start + em->len;
         }
-       ret = 0;
- error:
-       btrfs_free_path(path);
+       read_unlock(&em_tree->lock);
+ 
         return ret;
   }
   
@@@ -1462,31 -1481,23 +1481,23 @@@ int btrfs_rm_device(struct btrfs_root *
         btrfs_dev_replace_unlock(&root->fs_info->dev_replace);
   
         if ((all_avail & BTRFS_BLOCK_GROUP_RAID10) && num_devices <= 4) {
-               printk(KERN_ERR "btrfs: unable to go below four devices "
-                      "on raid10\n");
-               ret = -EINVAL;
+               ret = BTRFS_ERROR_DEV_RAID10_MIN_NOT_MET;
                 goto out;
         }
   
         if ((all_avail & BTRFS_BLOCK_GROUP_RAID1) && num_devices <= 2) {
-               printk(KERN_ERR "btrfs: unable to go below two "
-                      "devices on raid1\n");
-               ret = -EINVAL;
+               ret = BTRFS_ERROR_DEV_RAID1_MIN_NOT_MET;
                 goto out;
         }
   
         if ((all_avail & BTRFS_BLOCK_GROUP_RAID5) &&
             root->fs_info->fs_devices->rw_devices <= 2) {
-               printk(KERN_ERR "btrfs: unable to go below two "
-                      "devices on raid5\n");
-               ret = -EINVAL;
+               ret = BTRFS_ERROR_DEV_RAID5_MIN_NOT_MET;
                 goto out;
         }
         if ((all_avail & BTRFS_BLOCK_GROUP_RAID6) &&
             root->fs_info->fs_devices->rw_devices <= 3) {
-               printk(KERN_ERR "btrfs: unable to go below three "
-                      "devices on raid6\n");
-               ret = -EINVAL;
+               ret = BTRFS_ERROR_DEV_RAID6_MIN_NOT_MET;
                 goto out;
         }
   
@@@ -1512,8 -1523,7 +1523,7 @@@
                 bh = NULL;
                 disk_super = NULL;
                 if (!device) {
-                       printk(KERN_ERR "btrfs: no missing devices found to "
-                              "remove\n");
+                       ret = BTRFS_ERROR_DEV_MISSING_NOT_FOUND;
                         goto out;
                 }
         } else {
@@@ -1535,15 -1545,12 +1545,12 @@@
         }
   
         if (device->is_tgtdev_for_dev_replace) {
-               pr_err("btrfs: unable to remove the dev_replace target dev\n");
-               ret = -EINVAL;
+               ret = BTRFS_ERROR_DEV_TGT_REPLACE;
                 goto error_brelse;
         }
   
         if (device->writeable && root->fs_info->fs_devices->rw_devices == 1) {
-               printk(KERN_ERR "btrfs: unable to remove the only writeable "
-                      "device\n");
-               ret = -EINVAL;
+               ret = BTRFS_ERROR_DEV_ONLY_WRITABLE;
                 goto error_brelse;
         }
   
@@@ -3295,10 -3302,7 +3302,7 @@@ int btrfs_resume_balance_async(struct b
         }
   
         tsk = kthread_run(balance_kthread, fs_info, "btrfs-balance");
-       if (IS_ERR(tsk))
-               return PTR_ERR(tsk);
- 
-       return 0;
+       return PTR_RET(tsk);
   }
   
   int btrfs_recover_balance(struct btrfs_fs_info *fs_info)
@@@ -3681,10 -3685,8 +3685,8 @@@ static void check_raid56_incompat_flag(
   }
   
   static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
-                              struct btrfs_root *extent_root,
-                              struct map_lookup **map_ret,
-                              u64 *num_bytes_out, u64 *stripe_size_out,
-                              u64 start, u64 type)
+                              struct btrfs_root *extent_root, u64 start,
+                              u64 type)
   {
         struct btrfs_fs_info *info = extent_root->fs_info;
         struct btrfs_fs_devices *fs_devices = info->fs_devices;
@@@ -3791,7 -3793,7 +3793,7 @@@
                 if (total_avail == 0)
                         continue;
   
-               ret = find_free_dev_extent(device,
+               ret = find_free_dev_extent(trans, device,
                                            max_stripe_size * dev_stripes,
                                            &dev_offset, &max_avail);
                 if (ret && ret != -ENOSPC)
@@@ -3903,12 -3905,8 +3905,8 @@@
         map->type = type;
         map->sub_stripes = sub_stripes;
   
-       *map_ret = map;
         num_bytes = stripe_size * data_stripes;
   
-       *stripe_size_out = stripe_size;
-       *num_bytes_out = num_bytes;
- 
         trace_btrfs_chunk_alloc(info->chunk_root, map, start, num_bytes);
   
         em = alloc_extent_map();
@@@ -3921,38 -3919,26 +3919,26 @@@
         em->len = num_bytes;
         em->block_start = 0;
         em->block_len = em->len;
+       em->orig_block_len = stripe_size;
   
         em_tree = &extent_root->fs_info->mapping_tree.map_tree;
         write_lock(&em_tree->lock);
         ret = add_extent_mapping(em_tree, em, 0);
+       if (!ret) {
+               list_add_tail(&em->list, &trans->transaction->pending_chunks);
+               atomic_inc(&em->refs);
+       }
         write_unlock(&em_tree->lock);
         if (ret) {
                 free_extent_map(em);
                 goto error;
         }
   
-       for (i = 0; i < map->num_stripes; ++i) {
-               struct btrfs_device *device;
-               u64 dev_offset;
- 
-               device = map->stripes[i].dev;
-               dev_offset = map->stripes[i].physical;
- 
-               ret = btrfs_alloc_dev_extent(trans, device,
-                               info->chunk_root->root_key.objectid,
-                               BTRFS_FIRST_CHUNK_TREE_OBJECTID,
-                               start, dev_offset, stripe_size);
-               if (ret)
-                       goto error_dev_extent;
-       }
- 
         ret = btrfs_make_block_group(trans, extent_root, 0, type,
                                      BTRFS_FIRST_CHUNK_TREE_OBJECTID,
                                      start, num_bytes);
-       if (ret) {
-               i = map->num_stripes - 1;
-               goto error_dev_extent;
-       }
+       if (ret)
+               goto error_del_extent;
   
         free_extent_map(em);
         check_raid56_incompat_flag(extent_root->fs_info, type);
@@@ -3960,18 -3946,7 +3946,7 @@@
         kfree(devices_info);
         return 0;
   
- error_dev_extent:
-       for (; i >= 0; i--) {
-               struct btrfs_device *device;
-               int err;
- 
-               device = map->stripes[i].dev;
-               err = btrfs_free_dev_extent(trans, device, start);
-               if (err) {
-                       btrfs_abort_transaction(trans, extent_root, err);
-                       break;
-               }
-       }
+ error_del_extent:
         write_lock(&em_tree->lock);
         remove_extent_mapping(em_tree, em);
         write_unlock(&em_tree->lock);
@@@ -3986,33 -3961,68 +3961,68 @@@ error
         return ret;
   }
   
- static int __finish_chunk_alloc(struct btrfs_trans_handle *trans,
+ int btrfs_finish_chunk_alloc(struct btrfs_trans_handle *trans,
                                 struct btrfs_root *extent_root,
-                               struct map_lookup *map, u64 chunk_offset,
-                               u64 chunk_size, u64 stripe_size)
+                               u64 chunk_offset, u64 chunk_size)
   {
-       u64 dev_offset;
         struct btrfs_key key;
         struct btrfs_root *chunk_root = extent_root->fs_info->chunk_root;
         struct btrfs_device *device;
         struct btrfs_chunk *chunk;
         struct btrfs_stripe *stripe;
-       size_t item_size = btrfs_chunk_item_size(map->num_stripes);
-       int index = 0;
+       struct extent_map_tree *em_tree;
+       struct extent_map *em;
+       struct map_lookup *map;
+       size_t item_size;
+       u64 dev_offset;
+       u64 stripe_size;
+       int i = 0;
         int ret;
   
+       em_tree = &extent_root->fs_info->mapping_tree.map_tree;
+       read_lock(&em_tree->lock);
+       em = lookup_extent_mapping(em_tree, chunk_offset, chunk_size);
+       read_unlock(&em_tree->lock);
+ 
+       if (!em) {
+               btrfs_crit(extent_root->fs_info, "unable to find logical "
+                          "%Lu len %Lu", chunk_offset, chunk_size);
+               return -EINVAL;
+       }
+ 
+       if (em->start != chunk_offset || em->len != chunk_size) {
+               btrfs_crit(extent_root->fs_info, "found a bad mapping, wanted"
+                         " %Lu-%Lu, found %Lu-%Lu\n", chunk_offset,
+                         chunk_size, em->start, em->len);
+               free_extent_map(em);
+               return -EINVAL;
+       }
+ 
+       map = (struct map_lookup *)em->bdev;
+       item_size = btrfs_chunk_item_size(map->num_stripes);
+       stripe_size = em->orig_block_len;
+ 
         chunk = kzalloc(item_size, GFP_NOFS);
-       if (!chunk)
-               return -ENOMEM;
+       if (!chunk) {
+               ret = -ENOMEM;
+               goto out;
+       }
+ 
+       for (i = 0; i < map->num_stripes; i++) {
+               device = map->stripes[i].dev;
+               dev_offset = map->stripes[i].physical;
   
-       index = 0;
-       while (index < map->num_stripes) {
-               device = map->stripes[index].dev;
                 device->bytes_used += stripe_size;
                 ret = btrfs_update_device(trans, device);
                 if (ret)
-                       goto out_free;
-               index++;
+                       goto out;
+               ret = btrfs_alloc_dev_extent(trans, device,
+                                            chunk_root->root_key.objectid,
+                                            BTRFS_FIRST_CHUNK_TREE_OBJECTID,
+                                            chunk_offset, dev_offset,
+                                            stripe_size);
+               if (ret)
+                       goto out;
         }
   
         spin_lock(&extent_root->fs_info->free_chunk_lock);
@@@ -4020,17 -4030,15 +4030,15 @@@
                                                    map->num_stripes);
         spin_unlock(&extent_root->fs_info->free_chunk_lock);
   
-       index = 0;
         stripe = &chunk->stripe;
-       while (index < map->num_stripes) {
-               device = map->stripes[index].dev;
-               dev_offset = map->stripes[index].physical;
+       for (i = 0; i < map->num_stripes; i++) {
+               device = map->stripes[i].dev;
+               dev_offset = map->stripes[i].physical;
   
                 btrfs_set_stack_stripe_devid(stripe, device->devid);
                 btrfs_set_stack_stripe_offset(stripe, dev_offset);
                 memcpy(stripe->dev_uuid, device->uuid, BTRFS_UUID_SIZE);
                 stripe++;
-               index++;
         }
   
         btrfs_set_stack_chunk_length(chunk, chunk_size);
@@@ -4048,7 -4056,6 +4056,6 @@@
         key.offset = chunk_offset;
   
         ret = btrfs_insert_item(trans, chunk_root, &key, chunk, item_size);
- 
         if (ret == 0 && map->type & BTRFS_BLOCK_GROUP_SYSTEM) {
                 /*
                  * TODO: Cleanup of inserted chunk root in case of
@@@ -4058,8 -4065,9 +4065,9 @@@
                                              item_size);
         }
   
- out_free:
+ out:
         kfree(chunk);
+       free_extent_map(em);
         return ret;
   }
   
@@@ -4074,27 -4082,9 +4082,9 @@@ int btrfs_alloc_chunk(struct btrfs_tran
                       struct btrfs_root *extent_root, u64 type)
   {
         u64 chunk_offset;
-       u64 chunk_size;
-       u64 stripe_size;
-       struct map_lookup *map;
-       struct btrfs_root *chunk_root = extent_root->fs_info->chunk_root;
-       int ret;
- 
-       ret = find_next_chunk(chunk_root, BTRFS_FIRST_CHUNK_TREE_OBJECTID,
-                             &chunk_offset);
-       if (ret)
-               return ret;
   
-       ret = __btrfs_alloc_chunk(trans, extent_root, &map, &chunk_size,
-                                 &stripe_size, chunk_offset, type);
-       if (ret)
-               return ret;
- 
-       ret = __finish_chunk_alloc(trans, extent_root, map, chunk_offset,
-                                  chunk_size, stripe_size);
-       if (ret)
-               return ret;
-       return 0;
+       chunk_offset = find_next_chunk(extent_root->fs_info);
+       return __btrfs_alloc_chunk(trans, extent_root, chunk_offset, type);
   }
   
   static noinline int init_first_rw_device(struct btrfs_trans_handle *trans,
@@@ -4103,66 -4093,31 +4093,31 @@@
   {
         u64 chunk_offset;
         u64 sys_chunk_offset;
-       u64 chunk_size;
-       u64 sys_chunk_size;
-       u64 stripe_size;
-       u64 sys_stripe_size;
         u64 alloc_profile;
-       struct map_lookup *map;
-       struct map_lookup *sys_map;
         struct btrfs_fs_info *fs_info = root->fs_info;
         struct btrfs_root *extent_root = fs_info->extent_root;
         int ret;
   
-       ret = find_next_chunk(fs_info->chunk_root,
-                             BTRFS_FIRST_CHUNK_TREE_OBJECTID, &chunk_offset);
-       if (ret)
-               return ret;
- 
+       chunk_offset = find_next_chunk(fs_info);
         alloc_profile = btrfs_get_alloc_profile(extent_root, 0);
-       ret = __btrfs_alloc_chunk(trans, extent_root, &map, &chunk_size,
-                                 &stripe_size, chunk_offset, alloc_profile);
+       ret = __btrfs_alloc_chunk(trans, extent_root, chunk_offset,
+                                 alloc_profile);
         if (ret)
                 return ret;
   
-       sys_chunk_offset = chunk_offset + chunk_size;
- 
+       sys_chunk_offset = find_next_chunk(root->fs_info);
         alloc_profile = btrfs_get_alloc_profile(fs_info->chunk_root, 0);
-       ret = __btrfs_alloc_chunk(trans, extent_root, &sys_map,
-                                 &sys_chunk_size, &sys_stripe_size,
-                                 sys_chunk_offset, alloc_profile);
+       ret = __btrfs_alloc_chunk(trans, extent_root, sys_chunk_offset,
+                                 alloc_profile);
         if (ret) {
                 btrfs_abort_transaction(trans, root, ret);
                 goto out;
         }
   
         ret = btrfs_add_device(trans, fs_info->chunk_root, device);
-       if (ret) {
-               btrfs_abort_transaction(trans, root, ret);
-               goto out;
-       }
- 
-       /*
-        * Modifying chunk tree needs allocating new blocks from both
-        * system block group and metadata block group. So we only can
-        * do operations require modifying the chunk tree after both
-        * block groups were created.
-        */
-       ret = __finish_chunk_alloc(trans, extent_root, map, chunk_offset,
-                                  chunk_size, stripe_size);
-       if (ret) {
-               btrfs_abort_transaction(trans, root, ret);
-               goto out;
-       }
- 
-       ret = __finish_chunk_alloc(trans, extent_root, sys_map,
-                                  sys_chunk_offset, sys_chunk_size,
-                                  sys_stripe_size);
         if (ret)
                 btrfs_abort_transaction(trans, root, ret);
- 
   out:
- 
         return ret;
   }
   
@@@ -4435,9 -4390,6 +4390,6 @@@ static int __btrfs_map_block(struct btr
         map = (struct map_lookup *)em->bdev;
         offset = logical - em->start;
   
-       if (mirror_num > map->num_stripes)
-               mirror_num = 0;
- 
         stripe_len = map->stripe_len;
         stripe_nr = offset;
         /*
@@@ -5164,7 -5116,7 +5116,7 @@@ static int bio_size_ok(struct block_dev
         }
   
         prev = &bio->bi_io_vec[bio->bi_vcnt - 1];
- -      if ((bio->bi_size >> 9) > max_sectors)
+ +      if (bio_sectors(bio) > max_sectors)
                 return 0;
   
         if (!q->merge_bvec_fn)
@@@ -5367,7 -5319,6 +5319,6 @@@ static struct btrfs_device *add_missing
                 return NULL;
         list_add(&device->dev_list,
                  &fs_devices->devices);
-       device->dev_root = root->fs_info->dev_root;
         device->devid = devid;
         device->work.func = pending_bios_fn;
         device->fs_devices = fs_devices;
@@@ -5593,7 -5544,6 +5544,6 @@@ static int read_one_dev(struct btrfs_ro
         }
   
         fill_device_from_item(leaf, dev_item, device);
-       device->dev_root = root->fs_info->dev_root;
         device->in_fs_metadata = 1;
         if (device->writeable && !device->is_tgtdev_for_dev_replace) {
                 device->fs_devices->total_rw_bytes += device->total_bytes;
@@@ -5751,6 -5701,17 +5701,17 @@@ error
         return ret;
   }
   
+ void btrfs_init_devices_late(struct btrfs_fs_info *fs_info)
+ {
+       struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
+       struct btrfs_device *device;
+ 
+       mutex_lock(&fs_devices->device_list_mutex);
+       list_for_each_entry(device, &fs_devices->devices, dev_list)
+               device->dev_root = fs_info->dev_root;
+       mutex_unlock(&fs_devices->device_list_mutex);
+ }
+ 
   static void __btrfs_reset_dev_stats(struct btrfs_device *dev)
   {
         int i;
author	Linus Torvalds <torvalds@linux-foundation.org>
	Tue, 9 Jul 2013 19:33:09 +0000 (12:33 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Tue, 9 Jul 2013 19:33:09 +0000 (12:33 -0700)
		1	2
fs/btrfs/delayed-inode.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/btrfs/disk-io.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/btrfs/extent_io.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/btrfs/file.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/btrfs/free-space-cache.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/btrfs/inode.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/btrfs/ioctl.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/btrfs/volumes.c	patch \|	diff1 \|	diff2 \|	blob \| history