]> git.proxmox.com Git - mirror_ubuntu-zesty-kernel.git/blobdiff - fs/btrfs/disk-io.c
Btrfs: fix use-after-free bug during umount
[mirror_ubuntu-zesty-kernel.git] / fs / btrfs / disk-io.c
index 92c44ed78de11012b19b462b6e339f34a9f5c915..7c66c2314c14021e5488f99ebce182372798bebc 100644 (file)
@@ -70,6 +70,8 @@ static int btrfs_destroy_marked_extents(struct btrfs_root *root,
                                        int mark);
 static int btrfs_destroy_pinned_extent(struct btrfs_root *root,
                                       struct extent_io_tree *pinned_extents);
+static int btrfs_cleanup_transaction(struct btrfs_root *root);
+static void btrfs_error_commit_super(struct btrfs_root *root);
 
 /*
  * end_io_wq structs are used to do processing in task context when an IO is
@@ -150,7 +152,7 @@ static struct btrfs_lockdep_keyset {
        { .id = BTRFS_DEV_TREE_OBJECTID,        .name_stem = "dev"      },
        { .id = BTRFS_FS_TREE_OBJECTID,         .name_stem = "fs"       },
        { .id = BTRFS_CSUM_TREE_OBJECTID,       .name_stem = "csum"     },
-       { .id = BTRFS_ORPHAN_OBJECTID,          .name_stem = "orphan"   },
+       { .id = BTRFS_QUOTA_TREE_OBJECTID,      .name_stem = "quota"    },
        { .id = BTRFS_TREE_LOG_OBJECTID,        .name_stem = "log"      },
        { .id = BTRFS_TREE_RELOC_OBJECTID,      .name_stem = "treloc"   },
        { .id = BTRFS_DATA_RELOC_TREE_OBJECTID, .name_stem = "dreloc"   },
@@ -354,6 +356,49 @@ out:
        return ret;
 }
 
+/*
+ * Return 0 if the superblock checksum type matches the checksum value of that
+ * algorithm. Pass the raw disk superblock data.
+ */
+static int btrfs_check_super_csum(char *raw_disk_sb)
+{
+       struct btrfs_super_block *disk_sb =
+               (struct btrfs_super_block *)raw_disk_sb;
+       u16 csum_type = btrfs_super_csum_type(disk_sb);
+       int ret = 0;
+
+       if (csum_type == BTRFS_CSUM_TYPE_CRC32) {
+               u32 crc = ~(u32)0;
+               const int csum_size = sizeof(crc);
+               char result[csum_size];
+
+               /*
+                * The super_block structure does not span the whole
+                * BTRFS_SUPER_INFO_SIZE range, we expect that the unused space
+                * is filled with zeros and is included in the checkum.
+                */
+               crc = btrfs_csum_data(raw_disk_sb + BTRFS_CSUM_SIZE,
+                               crc, BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE);
+               btrfs_csum_final(crc, result);
+
+               if (memcmp(raw_disk_sb, result, csum_size))
+                       ret = 1;
+
+               if (ret && btrfs_super_generation(disk_sb) < 10) {
+                       printk(KERN_WARNING "btrfs: super block crcs don't match, older mkfs detected\n");
+                       ret = 0;
+               }
+       }
+
+       if (csum_type >= ARRAY_SIZE(btrfs_csum_sizes)) {
+               printk(KERN_ERR "btrfs: unsupported checksum algorithm %u\n",
+                               csum_type);
+               ret = 1;
+       }
+
+       return ret;
+}
+
 /*
  * helper to read a given tree block, doing retries as required when
  * the checksums don't match and we have alternate mirrors to try.
@@ -531,41 +576,6 @@ static noinline int check_leaf(struct btrfs_root *root,
        return 0;
 }
 
-struct extent_buffer *find_eb_for_page(struct extent_io_tree *tree,
-                                      struct page *page, int max_walk)
-{
-       struct extent_buffer *eb;
-       u64 start = page_offset(page);
-       u64 target = start;
-       u64 min_start;
-
-       if (start < max_walk)
-               min_start = 0;
-       else
-               min_start = start - max_walk;
-
-       while (start >= min_start) {
-               eb = find_extent_buffer(tree, start, 0);
-               if (eb) {
-                       /*
-                        * we found an extent buffer and it contains our page
-                        * horray!
-                        */
-                       if (eb->start <= target &&
-                           eb->start + eb->len > target)
-                               return eb;
-
-                       /* we found an extent buffer that wasn't for us */
-                       free_extent_buffer(eb);
-                       return NULL;
-               }
-               if (start == 0)
-                       break;
-               start -= PAGE_CACHE_SIZE;
-       }
-       return NULL;
-}
-
 static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end,
                               struct extent_state *state, int mirror)
 {
@@ -999,14 +1009,8 @@ static int btree_releasepage(struct page *page, gfp_t gfp_flags)
 {
        if (PageWriteback(page) || PageDirty(page))
                return 0;
-       /*
-        * We need to mask out eg. __GFP_HIGHMEM and __GFP_DMA32 as we're doing
-        * slab allocation from alloc_extent_state down the callchain where
-        * it'd hit a BUG_ON as those flags are not allowed.
-        */
-       gfp_flags &= ~GFP_SLAB_BUG_MASK;
 
-       return try_release_extent_buffer(page, gfp_flags);
+       return try_release_extent_buffer(page);
 }
 
 static void btree_invalidatepage(struct page *page, unsigned long offset)
@@ -1509,7 +1513,6 @@ struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root,
        }
 
        root->commit_root = btrfs_root_node(root);
-       BUG_ON(!root->node); /* -ENOMEM */
 out:
        if (location->objectid != BTRFS_TREE_LOG_OBJECTID) {
                root->ref_cows = 1;
@@ -1976,6 +1979,7 @@ static void btrfs_stop_all_workers(struct btrfs_fs_info *fs_info)
        btrfs_stop_workers(&fs_info->caching_workers);
        btrfs_stop_workers(&fs_info->readahead_workers);
        btrfs_stop_workers(&fs_info->flush_workers);
+       btrfs_stop_workers(&fs_info->qgroup_rescan_workers);
 }
 
 /* helper to cleanup tree roots */
@@ -1983,30 +1987,33 @@ static void free_root_pointers(struct btrfs_fs_info *info, int chunk_root)
 {
        free_extent_buffer(info->tree_root->node);
        free_extent_buffer(info->tree_root->commit_root);
-       free_extent_buffer(info->dev_root->node);
-       free_extent_buffer(info->dev_root->commit_root);
-       free_extent_buffer(info->extent_root->node);
-       free_extent_buffer(info->extent_root->commit_root);
-       free_extent_buffer(info->csum_root->node);
-       free_extent_buffer(info->csum_root->commit_root);
-       if (info->quota_root) {
-               free_extent_buffer(info->quota_root->node);
-               free_extent_buffer(info->quota_root->commit_root);
-       }
-
        info->tree_root->node = NULL;
        info->tree_root->commit_root = NULL;
-       info->dev_root->node = NULL;
-       info->dev_root->commit_root = NULL;
-       info->extent_root->node = NULL;
-       info->extent_root->commit_root = NULL;
-       info->csum_root->node = NULL;
-       info->csum_root->commit_root = NULL;
+
+       if (info->dev_root) {
+               free_extent_buffer(info->dev_root->node);
+               free_extent_buffer(info->dev_root->commit_root);
+               info->dev_root->node = NULL;
+               info->dev_root->commit_root = NULL;
+       }
+       if (info->extent_root) {
+               free_extent_buffer(info->extent_root->node);
+               free_extent_buffer(info->extent_root->commit_root);
+               info->extent_root->node = NULL;
+               info->extent_root->commit_root = NULL;
+       }
+       if (info->csum_root) {
+               free_extent_buffer(info->csum_root->node);
+               free_extent_buffer(info->csum_root->commit_root);
+               info->csum_root->node = NULL;
+               info->csum_root->commit_root = NULL;
+       }
        if (info->quota_root) {
+               free_extent_buffer(info->quota_root->node);
+               free_extent_buffer(info->quota_root->commit_root);
                info->quota_root->node = NULL;
                info->quota_root->commit_root = NULL;
        }
-
        if (chunk_root) {
                free_extent_buffer(info->chunk_root->node);
                free_extent_buffer(info->chunk_root->commit_root);
@@ -2267,6 +2274,7 @@ int open_ctree(struct super_block *sb,
        fs_info->qgroup_seq = 1;
        fs_info->quota_enabled = 0;
        fs_info->pending_quota_state = 0;
+       mutex_init(&fs_info->qgroup_rescan_lock);
 
        btrfs_init_free_cluster(&fs_info->meta_alloc_cluster);
        btrfs_init_free_cluster(&fs_info->data_alloc_cluster);
@@ -2286,12 +2294,31 @@ int open_ctree(struct super_block *sb,
                     fs_info, BTRFS_ROOT_TREE_OBJECTID);
 
        invalidate_bdev(fs_devices->latest_bdev);
+
+       /*
+        * Read super block and check the signature bytes only
+        */
        bh = btrfs_read_dev_super(fs_devices->latest_bdev);
        if (!bh) {
                err = -EINVAL;
                goto fail_alloc;
        }
 
+       /*
+        * We want to check superblock checksum, the type is stored inside.
+        * Pass the whole disk block of size BTRFS_SUPER_INFO_SIZE (4k).
+        */
+       if (btrfs_check_super_csum(bh->b_data)) {
+               printk(KERN_ERR "btrfs: superblock checksum mismatch\n");
+               err = -EINVAL;
+               goto fail_alloc;
+       }
+
+       /*
+        * super_copy is zeroed at allocation time and we never touch the
+        * following bytes up to INFO_SIZE, the checksum is calculated from
+        * the whole block of INFO_SIZE
+        */
        memcpy(fs_info->super_copy, bh->b_data, sizeof(*fs_info->super_copy));
        memcpy(fs_info->super_for_commit, fs_info->super_copy,
               sizeof(*fs_info->super_for_commit));
@@ -2299,6 +2326,13 @@ int open_ctree(struct super_block *sb,
 
        memcpy(fs_info->fsid, fs_info->super_copy->fsid, BTRFS_FSID_SIZE);
 
+       ret = btrfs_check_super_valid(fs_info, sb->s_flags & MS_RDONLY);
+       if (ret) {
+               printk(KERN_ERR "btrfs: superblock contains fatal errors\n");
+               err = -EINVAL;
+               goto fail_alloc;
+       }
+
        disk_super = fs_info->super_copy;
        if (!btrfs_super_root(disk_super))
                goto fail_alloc;
@@ -2307,13 +2341,6 @@ int open_ctree(struct super_block *sb,
        if (btrfs_super_flags(disk_super) & BTRFS_SUPER_FLAG_ERROR)
                set_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state);
 
-       ret = btrfs_check_super_valid(fs_info, sb->s_flags & MS_RDONLY);
-       if (ret) {
-               printk(KERN_ERR "btrfs: superblock contains fatal errors\n");
-               err = ret;
-               goto fail_alloc;
-       }
-
        /*
         * run through our array of backup supers and setup
         * our ring pointer to the oldest one
@@ -2476,6 +2503,8 @@ int open_ctree(struct super_block *sb,
        btrfs_init_workers(&fs_info->readahead_workers, "readahead",
                           fs_info->thread_pool_size,
                           &fs_info->generic_worker);
+       btrfs_init_workers(&fs_info->qgroup_rescan_workers, "qgroup-rescan", 1,
+                          &fs_info->generic_worker);
 
        /*
         * endios are largely parallel and should have a very
@@ -2510,6 +2539,7 @@ int open_ctree(struct super_block *sb,
        ret |= btrfs_start_workers(&fs_info->caching_workers);
        ret |= btrfs_start_workers(&fs_info->readahead_workers);
        ret |= btrfs_start_workers(&fs_info->flush_workers);
+       ret |= btrfs_start_workers(&fs_info->qgroup_rescan_workers);
        if (ret) {
                err = -ENOMEM;
                goto fail_sb_buffer;
@@ -2829,8 +2859,8 @@ fail_qgroup:
        btrfs_free_qgroup_config(fs_info);
 fail_trans_kthread:
        kthread_stop(fs_info->transaction_kthread);
-       del_fs_roots(fs_info);
        btrfs_cleanup_transaction(fs_info->tree_root);
+       del_fs_roots(fs_info);
 fail_cleaner:
        kthread_stop(fs_info->cleaner_kthread);
 
@@ -2981,7 +3011,10 @@ static int write_dev_supers(struct btrfs_device *device,
                if (wait) {
                        bh = __find_get_block(device->bdev, bytenr / 4096,
                                              BTRFS_SUPER_INFO_SIZE);
-                       BUG_ON(!bh);
+                       if (!bh) {
+                               errors++;
+                               continue;
+                       }
                        wait_on_buffer(bh);
                        if (!buffer_uptodate(bh))
                                errors++;
@@ -3008,6 +3041,13 @@ static int write_dev_supers(struct btrfs_device *device,
                         */
                        bh = __getblk(device->bdev, bytenr / 4096,
                                      BTRFS_SUPER_INFO_SIZE);
+                       if (!bh) {
+                               printk(KERN_ERR "btrfs: couldn't get super "
+                                      "buffer head for bytenr %Lu\n", bytenr);
+                               errors++;
+                               continue;
+                       }
+
                        memcpy(bh->b_data, sb, BTRFS_SUPER_INFO_SIZE);
 
                        /* one reference for submit_bh */
@@ -3090,7 +3130,7 @@ static int write_dev_flush(struct btrfs_device *device, int wait)
         * caller
         */
        device->flush_bio = NULL;
-       bio = bio_alloc(GFP_NOFS, 0);
+       bio = btrfs_io_bio_alloc(GFP_NOFS, 0);
        if (!bio)
                return -ENOMEM;
 
@@ -3230,7 +3270,7 @@ int btrfs_calc_num_tolerated_disk_barrier_failures(
        return num_tolerated_disk_barrier_failures;
 }
 
-int write_all_supers(struct btrfs_root *root, int max_mirrors)
+static int write_all_supers(struct btrfs_root *root, int max_mirrors)
 {
        struct list_head *head;
        struct btrfs_device *dev;
@@ -3472,10 +3512,10 @@ int close_ctree(struct btrfs_root *root)
                       percpu_counter_sum(&fs_info->delalloc_bytes));
        }
 
-       free_root_pointers(fs_info, 1);
-
        btrfs_free_block_groups(fs_info);
 
+       free_root_pointers(fs_info, 1);
+
        del_fs_roots(fs_info);
 
        iput(fs_info->btree_inode);
@@ -3585,18 +3625,13 @@ int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid)
 static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info,
                              int read_only)
 {
-       if (btrfs_super_csum_type(fs_info->super_copy) >= ARRAY_SIZE(btrfs_csum_sizes)) {
-               printk(KERN_ERR "btrfs: unsupported checksum algorithm\n");
-               return -EINVAL;
-       }
-
-       if (read_only)
-               return 0;
-
+       /*
+        * Placeholder for checks
+        */
        return 0;
 }
 
-void btrfs_error_commit_super(struct btrfs_root *root)
+static void btrfs_error_commit_super(struct btrfs_root *root)
 {
        mutex_lock(&root->fs_info->cleaner_mutex);
        btrfs_run_delayed_iputs(root);
@@ -3626,8 +3661,11 @@ static void btrfs_destroy_ordered_operations(struct btrfs_transaction *t,
                                         ordered_operations);
 
                list_del_init(&btrfs_inode->ordered_operations);
+               spin_unlock(&root->fs_info->ordered_extent_lock);
 
                btrfs_invalidate_inodes(btrfs_inode->root);
+
+               spin_lock(&root->fs_info->ordered_extent_lock);
        }
 
        spin_unlock(&root->fs_info->ordered_extent_lock);
@@ -3749,8 +3787,11 @@ static void btrfs_destroy_delalloc_inodes(struct btrfs_root *root)
                list_del_init(&btrfs_inode->delalloc_inodes);
                clear_bit(BTRFS_INODE_IN_DELALLOC_LIST,
                          &btrfs_inode->runtime_flags);
+               spin_unlock(&root->fs_info->delalloc_lock);
 
                btrfs_invalidate_inodes(btrfs_inode->root);
+
+               spin_lock(&root->fs_info->delalloc_lock);
        }
 
        spin_unlock(&root->fs_info->delalloc_lock);
@@ -3775,7 +3816,7 @@ static int btrfs_destroy_marked_extents(struct btrfs_root *root,
                while (start <= end) {
                        eb = btrfs_find_tree_block(root, start,
                                                   root->leafsize);
-                       start += eb->len;
+                       start += root->leafsize;
                        if (!eb)
                                continue;
                        wait_on_extent_buffer_writeback(eb);
@@ -3864,7 +3905,7 @@ void btrfs_cleanup_one_transaction(struct btrfs_transaction *cur_trans,
        */
 }
 
-int btrfs_cleanup_transaction(struct btrfs_root *root)
+static int btrfs_cleanup_transaction(struct btrfs_root *root)
 {
        struct btrfs_transaction *t;
        LIST_HEAD(list);