]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/blobdiff - fs/btrfs/disk-io.c
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux...
[mirror_ubuntu-artful-kernel.git] / fs / btrfs / disk-io.c
index 22a0439e5a86316196f07c26896e823f5cf756a1..a8f652dc940bd85148dad48c11d2d893aefe32ba 100644 (file)
@@ -45,6 +45,7 @@
 #include "inode-map.h"
 #include "check-integrity.h"
 #include "rcu-string.h"
+#include "dev-replace.h"
 
 #ifdef CONFIG_X86
 #include <asm/cpufeature.h>
@@ -387,7 +388,7 @@ static int btree_read_extent_buffer_pages(struct btrfs_root *root,
                if (test_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags))
                        break;
 
-               num_copies = btrfs_num_copies(&root->fs_info->mapping_tree,
+               num_copies = btrfs_num_copies(root->fs_info,
                                              eb->start, eb->len);
                if (num_copies == 1)
                        break;
@@ -852,11 +853,16 @@ static int __btree_submit_bio_done(struct inode *inode, int rw, struct bio *bio,
                                 int mirror_num, unsigned long bio_flags,
                                 u64 bio_offset)
 {
+       int ret;
+
        /*
         * when we're called for a write, we're already in the async
         * submission context.  Just jump into btrfs_map_bio
         */
-       return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio, mirror_num, 1);
+       ret = btrfs_map_bio(BTRFS_I(inode)->root, rw, bio, mirror_num, 1);
+       if (ret)
+               bio_endio(bio, ret);
+       return ret;
 }
 
 static int check_async_write(struct inode *inode, unsigned long bio_flags)
@@ -878,7 +884,6 @@ static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
        int ret;
 
        if (!(rw & REQ_WRITE)) {
-
                /*
                 * called for a read, do the setup so that checksum validation
                 * can happen in the async kernel threads
@@ -886,26 +891,32 @@ static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
                ret = btrfs_bio_wq_end_io(BTRFS_I(inode)->root->fs_info,
                                          bio, 1);
                if (ret)
-                       return ret;
-               return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio,
-                                    mirror_num, 0);
+                       goto out_w_error;
+               ret = btrfs_map_bio(BTRFS_I(inode)->root, rw, bio,
+                                   mirror_num, 0);
        } else if (!async) {
                ret = btree_csum_one_bio(bio);
                if (ret)
-                       return ret;
-               return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio,
-                                    mirror_num, 0);
+                       goto out_w_error;
+               ret = btrfs_map_bio(BTRFS_I(inode)->root, rw, bio,
+                                   mirror_num, 0);
+       } else {
+               /*
+                * kthread helpers are used to submit writes so that
+                * checksumming can happen in parallel across all CPUs
+                */
+               ret = btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info,
+                                         inode, rw, bio, mirror_num, 0,
+                                         bio_offset,
+                                         __btree_submit_bio_start,
+                                         __btree_submit_bio_done);
        }
 
-       /*
-        * kthread helpers are used to submit writes so that checksumming
-        * can happen in parallel across all CPUs
-        */
-       return btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info,
-                                  inode, rw, bio, mirror_num, 0,
-                                  bio_offset,
-                                  __btree_submit_bio_start,
-                                  __btree_submit_bio_done);
+       if (ret) {
+out_w_error:
+               bio_endio(bio, ret);
+       }
+       return ret;
 }
 
 #ifdef CONFIG_MIGRATION
@@ -990,6 +1001,7 @@ static void btree_invalidatepage(struct page *page, unsigned long offset)
 
 static int btree_set_page_dirty(struct page *page)
 {
+#ifdef DEBUG
        struct extent_buffer *eb;
 
        BUG_ON(!PagePrivate(page));
@@ -998,6 +1010,7 @@ static int btree_set_page_dirty(struct page *page)
        BUG_ON(!test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags));
        BUG_ON(!atomic_read(&eb->refs));
        btrfs_assert_tree_locked(eb);
+#endif
        return __set_page_dirty_nobuffers(page);
 }
 
@@ -1129,11 +1142,11 @@ void clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root,
                                          root->fs_info->dirty_metadata_bytes);
                        }
                        spin_unlock(&root->fs_info->delalloc_lock);
-               }
 
-               /* ugh, clear_extent_buffer_dirty needs to lock the page */
-               btrfs_set_lock_blocking(buf);
-               clear_extent_buffer_dirty(buf);
+                       /* ugh, clear_extent_buffer_dirty needs to lock the page */
+                       btrfs_set_lock_blocking(buf);
+                       clear_extent_buffer_dirty(buf);
+               }
        }
 }
 
@@ -1193,7 +1206,7 @@ static void __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
        root->root_key.objectid = objectid;
        root->anon_dev = 0;
 
-       spin_lock_init(&root->root_times_lock);
+       spin_lock_init(&root->root_item_lock);
 }
 
 static int __must_check find_and_setup_root(struct btrfs_root *tree_root,
@@ -2131,6 +2144,11 @@ int open_ctree(struct super_block *sb,
        init_rwsem(&fs_info->extent_commit_sem);
        init_rwsem(&fs_info->cleanup_work_sem);
        init_rwsem(&fs_info->subvol_sem);
+       fs_info->dev_replace.lock_owner = 0;
+       atomic_set(&fs_info->dev_replace.nesting_level, 0);
+       mutex_init(&fs_info->dev_replace.lock_finishing_cancel_unmount);
+       mutex_init(&fs_info->dev_replace.lock_management_lock);
+       mutex_init(&fs_info->dev_replace.lock);
 
        spin_lock_init(&fs_info->qgroup_lock);
        fs_info->qgroup_tree = RB_ROOT;
@@ -2279,6 +2297,10 @@ int open_ctree(struct super_block *sb,
                           fs_info->thread_pool_size,
                           &fs_info->generic_worker);
 
+       btrfs_init_workers(&fs_info->flush_workers, "flush_delalloc",
+                          fs_info->thread_pool_size,
+                          &fs_info->generic_worker);
+
        btrfs_init_workers(&fs_info->submit_workers, "submit",
                           min_t(u64, fs_devices->num_devices,
                           fs_info->thread_pool_size),
@@ -2350,6 +2372,7 @@ int open_ctree(struct super_block *sb,
        ret |= btrfs_start_workers(&fs_info->delayed_workers);
        ret |= btrfs_start_workers(&fs_info->caching_workers);
        ret |= btrfs_start_workers(&fs_info->readahead_workers);
+       ret |= btrfs_start_workers(&fs_info->flush_workers);
        if (ret) {
                err = -ENOMEM;
                goto fail_sb_buffer;
@@ -2418,7 +2441,11 @@ int open_ctree(struct super_block *sb,
                goto fail_tree_roots;
        }
 
-       btrfs_close_extra_devices(fs_devices);
+       /*
+        * keep the device that is marked to be the target device for the
+        * dev_replace procedure
+        */
+       btrfs_close_extra_devices(fs_info, fs_devices, 0);
 
        if (!fs_devices->latest_bdev) {
                printk(KERN_CRIT "btrfs: failed to read devices on %s\n",
@@ -2490,6 +2517,14 @@ retry_root_backup:
                goto fail_block_groups;
        }
 
+       ret = btrfs_init_dev_replace(fs_info);
+       if (ret) {
+               pr_err("btrfs: failed to init dev_replace: %d\n", ret);
+               goto fail_block_groups;
+       }
+
+       btrfs_close_extra_devices(fs_info, fs_devices, 1);
+
        ret = btrfs_init_space_info(fs_info);
        if (ret) {
                printk(KERN_ERR "Failed to initial space info: %d\n", ret);
@@ -2503,6 +2538,13 @@ retry_root_backup:
        }
        fs_info->num_tolerated_disk_barrier_failures =
                btrfs_calc_num_tolerated_disk_barrier_failures(fs_info);
+       if (fs_info->fs_devices->missing_devices >
+            fs_info->num_tolerated_disk_barrier_failures &&
+           !(sb->s_flags & MS_RDONLY)) {
+               printk(KERN_WARNING
+                      "Btrfs: too many missing devices, writeable mount is not allowed\n");
+               goto fail_block_groups;
+       }
 
        fs_info->cleaner_kthread = kthread_run(cleaner_kthread, tree_root,
                                               "btrfs-cleaner");
@@ -2631,6 +2673,13 @@ retry_root_backup:
                return ret;
        }
 
+       ret = btrfs_resume_dev_replace_async(fs_info);
+       if (ret) {
+               pr_warn("btrfs: failed to resume dev_replace\n");
+               close_ctree(tree_root);
+               return ret;
+       }
+
        return 0;
 
 fail_qgroup:
@@ -2667,6 +2716,7 @@ fail_sb_buffer:
        btrfs_stop_workers(&fs_info->submit_workers);
        btrfs_stop_workers(&fs_info->delayed_workers);
        btrfs_stop_workers(&fs_info->caching_workers);
+       btrfs_stop_workers(&fs_info->flush_workers);
 fail_alloc:
 fail_iput:
        btrfs_mapping_tree_free(&fs_info->mapping_tree);
@@ -3270,16 +3320,18 @@ int close_ctree(struct btrfs_root *root)
        smp_mb();
 
        /* pause restriper - we want to resume on mount */
-       btrfs_pause_balance(root->fs_info);
+       btrfs_pause_balance(fs_info);
 
-       btrfs_scrub_cancel(root);
+       btrfs_dev_replace_suspend_for_unmount(fs_info);
+
+       btrfs_scrub_cancel(fs_info);
 
        /* wait for any defraggers to finish */
        wait_event(fs_info->transaction_wait,
                   (atomic_read(&fs_info->defrag_running) == 0));
 
        /* clear out the rbtree of defraggable inodes */
-       btrfs_run_defrag_inodes(fs_info);
+       btrfs_cleanup_defrag_inodes(fs_info);
 
        if (!(fs_info->sb->s_flags & MS_RDONLY)) {
                ret = btrfs_commit_super(root);
@@ -3339,6 +3391,7 @@ int close_ctree(struct btrfs_root *root)
        btrfs_stop_workers(&fs_info->delayed_workers);
        btrfs_stop_workers(&fs_info->caching_workers);
        btrfs_stop_workers(&fs_info->readahead_workers);
+       btrfs_stop_workers(&fs_info->flush_workers);
 
 #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
        if (btrfs_test_opt(root, CHECK_INTEGRITY))
@@ -3383,14 +3436,12 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf)
        int was_dirty;
 
        btrfs_assert_tree_locked(buf);
-       if (transid != root->fs_info->generation) {
-               printk(KERN_CRIT "btrfs transid mismatch buffer %llu, "
+       if (transid != root->fs_info->generation)
+               WARN(1, KERN_CRIT "btrfs transid mismatch buffer %llu, "
                       "found %llu running %llu\n",
                        (unsigned long long)buf->start,
                        (unsigned long long)transid,
                        (unsigned long long)root->fs_info->generation);
-               WARN_ON(1);
-       }
        was_dirty = set_extent_buffer_dirty(buf);
        if (!was_dirty) {
                spin_lock(&root->fs_info->delalloc_lock);
@@ -3399,7 +3450,8 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf)
        }
 }
 
-void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr)
+static void __btrfs_btree_balance_dirty(struct btrfs_root *root,
+                                       int flush_delayed)
 {
        /*
         * looks as though older kernels can get into trouble with
@@ -3411,7 +3463,8 @@ void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr)
        if (current->flags & PF_MEMALLOC)
                return;
 
-       btrfs_balance_delayed_items(root);
+       if (flush_delayed)
+               btrfs_balance_delayed_items(root);
 
        num_dirty = root->fs_info->dirty_metadata_bytes;
 
@@ -3422,25 +3475,14 @@ void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr)
        return;
 }
 
-void __btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr)
+void btrfs_btree_balance_dirty(struct btrfs_root *root)
 {
-       /*
-        * looks as though older kernels can get into trouble with
-        * this code, they end up stuck in balance_dirty_pages forever
-        */
-       u64 num_dirty;
-       unsigned long thresh = 32 * 1024 * 1024;
-
-       if (current->flags & PF_MEMALLOC)
-               return;
-
-       num_dirty = root->fs_info->dirty_metadata_bytes;
+       __btrfs_btree_balance_dirty(root, 1);
+}
 
-       if (num_dirty > thresh) {
-               balance_dirty_pages_ratelimited(
-                                  root->fs_info->btree_inode->i_mapping);
-       }
-       return;
+void btrfs_btree_balance_dirty_nodelay(struct btrfs_root *root)
+{
+       __btrfs_btree_balance_dirty(root, 0);
 }
 
 int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid)