]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/blobdiff - fs/btrfs/volumes.c
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux...
[mirror_ubuntu-bionic-kernel.git] / fs / btrfs / volumes.c
index 8bffb9174afba04d8375b96f754256b68ff9b4ef..78b871753cb61e099abdfca27a0e316c37c329ee 100644 (file)
@@ -982,6 +982,35 @@ out:
        return ret;
 }
 
+static int contains_pending_extent(struct btrfs_trans_handle *trans,
+                                  struct btrfs_device *device,
+                                  u64 *start, u64 len)
+{
+       struct extent_map *em;
+       int ret = 0;
+
+       list_for_each_entry(em, &trans->transaction->pending_chunks, list) {
+               struct map_lookup *map;
+               int i;
+
+               map = (struct map_lookup *)em->bdev;
+               for (i = 0; i < map->num_stripes; i++) {
+                       if (map->stripes[i].dev != device)
+                               continue;
+                       if (map->stripes[i].physical >= *start + len ||
+                           map->stripes[i].physical + em->orig_block_len <=
+                           *start)
+                               continue;
+                       *start = map->stripes[i].physical +
+                               em->orig_block_len;
+                       ret = 1;
+               }
+       }
+
+       return ret;
+}
+
+
 /*
  * find_free_dev_extent - find free space in the specified device
  * @device:    the device which we search the free space in
@@ -1002,7 +1031,8 @@ out:
  * But if we don't find suitable free space, it is used to store the size of
  * the max free space.
  */
-int find_free_dev_extent(struct btrfs_device *device, u64 num_bytes,
+int find_free_dev_extent(struct btrfs_trans_handle *trans,
+                        struct btrfs_device *device, u64 num_bytes,
                         u64 *start, u64 *len)
 {
        struct btrfs_key key;
@@ -1026,21 +1056,22 @@ int find_free_dev_extent(struct btrfs_device *device, u64 num_bytes,
         */
        search_start = max(root->fs_info->alloc_start, 1024ull * 1024);
 
+       path = btrfs_alloc_path();
+       if (!path)
+               return -ENOMEM;
+again:
        max_hole_start = search_start;
        max_hole_size = 0;
        hole_size = 0;
 
        if (search_start >= search_end || device->is_tgtdev_for_dev_replace) {
                ret = -ENOSPC;
-               goto error;
+               goto out;
        }
 
-       path = btrfs_alloc_path();
-       if (!path) {
-               ret = -ENOMEM;
-               goto error;
-       }
        path->reada = 2;
+       path->search_commit_root = 1;
+       path->skip_locking = 1;
 
        key.objectid = device->devid;
        key.offset = search_start;
@@ -1081,6 +1112,15 @@ int find_free_dev_extent(struct btrfs_device *device, u64 num_bytes,
                if (key.offset > search_start) {
                        hole_size = key.offset - search_start;
 
+                       /*
+                        * Have to check before we set max_hole_start, otherwise
+                        * we could end up sending back this offset anyway.
+                        */
+                       if (contains_pending_extent(trans, device,
+                                                   &search_start,
+                                                   hole_size))
+                               hole_size = 0;
+
                        if (hole_size > max_hole_size) {
                                max_hole_start = search_start;
                                max_hole_size = hole_size;
@@ -1124,6 +1164,11 @@ next:
                max_hole_size = hole_size;
        }
 
+       if (contains_pending_extent(trans, device, &search_start, hole_size)) {
+               btrfs_release_path(path);
+               goto again;
+       }
+
        /* See above. */
        if (hole_size < num_bytes)
                ret = -ENOSPC;
@@ -1132,7 +1177,6 @@ next:
 
 out:
        btrfs_free_path(path);
-error:
        *start = max_hole_start;
        if (len)
                *len = max_hole_size;
@@ -1244,47 +1288,22 @@ out:
        return ret;
 }
 
-static noinline int find_next_chunk(struct btrfs_root *root,
-                                   u64 objectid, u64 *offset)
+static u64 find_next_chunk(struct btrfs_fs_info *fs_info)
 {
-       struct btrfs_path *path;
-       int ret;
-       struct btrfs_key key;
-       struct btrfs_chunk *chunk;
-       struct btrfs_key found_key;
-
-       path = btrfs_alloc_path();
-       if (!path)
-               return -ENOMEM;
-
-       key.objectid = objectid;
-       key.offset = (u64)-1;
-       key.type = BTRFS_CHUNK_ITEM_KEY;
-
-       ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
-       if (ret < 0)
-               goto error;
-
-       BUG_ON(ret == 0); /* Corruption */
+       struct extent_map_tree *em_tree;
+       struct extent_map *em;
+       struct rb_node *n;
+       u64 ret = 0;
 
-       ret = btrfs_previous_item(root, path, 0, BTRFS_CHUNK_ITEM_KEY);
-       if (ret) {
-               *offset = 0;
-       } else {
-               btrfs_item_key_to_cpu(path->nodes[0], &found_key,
-                                     path->slots[0]);
-               if (found_key.objectid != objectid)
-                       *offset = 0;
-               else {
-                       chunk = btrfs_item_ptr(path->nodes[0], path->slots[0],
-                                              struct btrfs_chunk);
-                       *offset = found_key.offset +
-                               btrfs_chunk_length(path->nodes[0], chunk);
-               }
+       em_tree = &fs_info->mapping_tree.map_tree;
+       read_lock(&em_tree->lock);
+       n = rb_last(&em_tree->map);
+       if (n) {
+               em = rb_entry(n, struct extent_map, rb_node);
+               ret = em->start + em->len;
        }
-       ret = 0;
-error:
-       btrfs_free_path(path);
+       read_unlock(&em_tree->lock);
+
        return ret;
 }
 
@@ -1462,31 +1481,23 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
        btrfs_dev_replace_unlock(&root->fs_info->dev_replace);
 
        if ((all_avail & BTRFS_BLOCK_GROUP_RAID10) && num_devices <= 4) {
-               printk(KERN_ERR "btrfs: unable to go below four devices "
-                      "on raid10\n");
-               ret = -EINVAL;
+               ret = BTRFS_ERROR_DEV_RAID10_MIN_NOT_MET;
                goto out;
        }
 
        if ((all_avail & BTRFS_BLOCK_GROUP_RAID1) && num_devices <= 2) {
-               printk(KERN_ERR "btrfs: unable to go below two "
-                      "devices on raid1\n");
-               ret = -EINVAL;
+               ret = BTRFS_ERROR_DEV_RAID1_MIN_NOT_MET;
                goto out;
        }
 
        if ((all_avail & BTRFS_BLOCK_GROUP_RAID5) &&
            root->fs_info->fs_devices->rw_devices <= 2) {
-               printk(KERN_ERR "btrfs: unable to go below two "
-                      "devices on raid5\n");
-               ret = -EINVAL;
+               ret = BTRFS_ERROR_DEV_RAID5_MIN_NOT_MET;
                goto out;
        }
        if ((all_avail & BTRFS_BLOCK_GROUP_RAID6) &&
            root->fs_info->fs_devices->rw_devices <= 3) {
-               printk(KERN_ERR "btrfs: unable to go below three "
-                      "devices on raid6\n");
-               ret = -EINVAL;
+               ret = BTRFS_ERROR_DEV_RAID6_MIN_NOT_MET;
                goto out;
        }
 
@@ -1512,8 +1523,7 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
                bh = NULL;
                disk_super = NULL;
                if (!device) {
-                       printk(KERN_ERR "btrfs: no missing devices found to "
-                              "remove\n");
+                       ret = BTRFS_ERROR_DEV_MISSING_NOT_FOUND;
                        goto out;
                }
        } else {
@@ -1535,15 +1545,12 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
        }
 
        if (device->is_tgtdev_for_dev_replace) {
-               pr_err("btrfs: unable to remove the dev_replace target dev\n");
-               ret = -EINVAL;
+               ret = BTRFS_ERROR_DEV_TGT_REPLACE;
                goto error_brelse;
        }
 
        if (device->writeable && root->fs_info->fs_devices->rw_devices == 1) {
-               printk(KERN_ERR "btrfs: unable to remove the only writeable "
-                      "device\n");
-               ret = -EINVAL;
+               ret = BTRFS_ERROR_DEV_ONLY_WRITABLE;
                goto error_brelse;
        }
 
@@ -3295,10 +3302,7 @@ int btrfs_resume_balance_async(struct btrfs_fs_info *fs_info)
        }
 
        tsk = kthread_run(balance_kthread, fs_info, "btrfs-balance");
-       if (IS_ERR(tsk))
-               return PTR_ERR(tsk);
-
-       return 0;
+       return PTR_RET(tsk);
 }
 
 int btrfs_recover_balance(struct btrfs_fs_info *fs_info)
@@ -3681,10 +3685,8 @@ static void check_raid56_incompat_flag(struct btrfs_fs_info *info, u64 type)
 }
 
 static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
-                              struct btrfs_root *extent_root,
-                              struct map_lookup **map_ret,
-                              u64 *num_bytes_out, u64 *stripe_size_out,
-                              u64 start, u64 type)
+                              struct btrfs_root *extent_root, u64 start,
+                              u64 type)
 {
        struct btrfs_fs_info *info = extent_root->fs_info;
        struct btrfs_fs_devices *fs_devices = info->fs_devices;
@@ -3791,7 +3793,7 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
                if (total_avail == 0)
                        continue;
 
-               ret = find_free_dev_extent(device,
+               ret = find_free_dev_extent(trans, device,
                                           max_stripe_size * dev_stripes,
                                           &dev_offset, &max_avail);
                if (ret && ret != -ENOSPC)
@@ -3903,12 +3905,8 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
        map->type = type;
        map->sub_stripes = sub_stripes;
 
-       *map_ret = map;
        num_bytes = stripe_size * data_stripes;
 
-       *stripe_size_out = stripe_size;
-       *num_bytes_out = num_bytes;
-
        trace_btrfs_chunk_alloc(info->chunk_root, map, start, num_bytes);
 
        em = alloc_extent_map();
@@ -3921,38 +3919,26 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
        em->len = num_bytes;
        em->block_start = 0;
        em->block_len = em->len;
+       em->orig_block_len = stripe_size;
 
        em_tree = &extent_root->fs_info->mapping_tree.map_tree;
        write_lock(&em_tree->lock);
        ret = add_extent_mapping(em_tree, em, 0);
+       if (!ret) {
+               list_add_tail(&em->list, &trans->transaction->pending_chunks);
+               atomic_inc(&em->refs);
+       }
        write_unlock(&em_tree->lock);
        if (ret) {
                free_extent_map(em);
                goto error;
        }
 
-       for (i = 0; i < map->num_stripes; ++i) {
-               struct btrfs_device *device;
-               u64 dev_offset;
-
-               device = map->stripes[i].dev;
-               dev_offset = map->stripes[i].physical;
-
-               ret = btrfs_alloc_dev_extent(trans, device,
-                               info->chunk_root->root_key.objectid,
-                               BTRFS_FIRST_CHUNK_TREE_OBJECTID,
-                               start, dev_offset, stripe_size);
-               if (ret)
-                       goto error_dev_extent;
-       }
-
        ret = btrfs_make_block_group(trans, extent_root, 0, type,
                                     BTRFS_FIRST_CHUNK_TREE_OBJECTID,
                                     start, num_bytes);
-       if (ret) {
-               i = map->num_stripes - 1;
-               goto error_dev_extent;
-       }
+       if (ret)
+               goto error_del_extent;
 
        free_extent_map(em);
        check_raid56_incompat_flag(extent_root->fs_info, type);
@@ -3960,18 +3946,7 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
        kfree(devices_info);
        return 0;
 
-error_dev_extent:
-       for (; i >= 0; i--) {
-               struct btrfs_device *device;
-               int err;
-
-               device = map->stripes[i].dev;
-               err = btrfs_free_dev_extent(trans, device, start);
-               if (err) {
-                       btrfs_abort_transaction(trans, extent_root, err);
-                       break;
-               }
-       }
+error_del_extent:
        write_lock(&em_tree->lock);
        remove_extent_mapping(em_tree, em);
        write_unlock(&em_tree->lock);
@@ -3986,33 +3961,68 @@ error:
        return ret;
 }
 
-static int __finish_chunk_alloc(struct btrfs_trans_handle *trans,
+int btrfs_finish_chunk_alloc(struct btrfs_trans_handle *trans,
                                struct btrfs_root *extent_root,
-                               struct map_lookup *map, u64 chunk_offset,
-                               u64 chunk_size, u64 stripe_size)
+                               u64 chunk_offset, u64 chunk_size)
 {
-       u64 dev_offset;
        struct btrfs_key key;
        struct btrfs_root *chunk_root = extent_root->fs_info->chunk_root;
        struct btrfs_device *device;
        struct btrfs_chunk *chunk;
        struct btrfs_stripe *stripe;
-       size_t item_size = btrfs_chunk_item_size(map->num_stripes);
-       int index = 0;
+       struct extent_map_tree *em_tree;
+       struct extent_map *em;
+       struct map_lookup *map;
+       size_t item_size;
+       u64 dev_offset;
+       u64 stripe_size;
+       int i = 0;
        int ret;
 
+       em_tree = &extent_root->fs_info->mapping_tree.map_tree;
+       read_lock(&em_tree->lock);
+       em = lookup_extent_mapping(em_tree, chunk_offset, chunk_size);
+       read_unlock(&em_tree->lock);
+
+       if (!em) {
+               btrfs_crit(extent_root->fs_info, "unable to find logical "
+                          "%Lu len %Lu", chunk_offset, chunk_size);
+               return -EINVAL;
+       }
+
+       if (em->start != chunk_offset || em->len != chunk_size) {
+               btrfs_crit(extent_root->fs_info, "found a bad mapping, wanted"
+                         " %Lu-%Lu, found %Lu-%Lu\n", chunk_offset,
+                         chunk_size, em->start, em->len);
+               free_extent_map(em);
+               return -EINVAL;
+       }
+
+       map = (struct map_lookup *)em->bdev;
+       item_size = btrfs_chunk_item_size(map->num_stripes);
+       stripe_size = em->orig_block_len;
+
        chunk = kzalloc(item_size, GFP_NOFS);
-       if (!chunk)
-               return -ENOMEM;
+       if (!chunk) {
+               ret = -ENOMEM;
+               goto out;
+       }
+
+       for (i = 0; i < map->num_stripes; i++) {
+               device = map->stripes[i].dev;
+               dev_offset = map->stripes[i].physical;
 
-       index = 0;
-       while (index < map->num_stripes) {
-               device = map->stripes[index].dev;
                device->bytes_used += stripe_size;
                ret = btrfs_update_device(trans, device);
                if (ret)
-                       goto out_free;
-               index++;
+                       goto out;
+               ret = btrfs_alloc_dev_extent(trans, device,
+                                            chunk_root->root_key.objectid,
+                                            BTRFS_FIRST_CHUNK_TREE_OBJECTID,
+                                            chunk_offset, dev_offset,
+                                            stripe_size);
+               if (ret)
+                       goto out;
        }
 
        spin_lock(&extent_root->fs_info->free_chunk_lock);
@@ -4020,17 +4030,15 @@ static int __finish_chunk_alloc(struct btrfs_trans_handle *trans,
                                                   map->num_stripes);
        spin_unlock(&extent_root->fs_info->free_chunk_lock);
 
-       index = 0;
        stripe = &chunk->stripe;
-       while (index < map->num_stripes) {
-               device = map->stripes[index].dev;
-               dev_offset = map->stripes[index].physical;
+       for (i = 0; i < map->num_stripes; i++) {
+               device = map->stripes[i].dev;
+               dev_offset = map->stripes[i].physical;
 
                btrfs_set_stack_stripe_devid(stripe, device->devid);
                btrfs_set_stack_stripe_offset(stripe, dev_offset);
                memcpy(stripe->dev_uuid, device->uuid, BTRFS_UUID_SIZE);
                stripe++;
-               index++;
        }
 
        btrfs_set_stack_chunk_length(chunk, chunk_size);
@@ -4048,7 +4056,6 @@ static int __finish_chunk_alloc(struct btrfs_trans_handle *trans,
        key.offset = chunk_offset;
 
        ret = btrfs_insert_item(trans, chunk_root, &key, chunk, item_size);
-
        if (ret == 0 && map->type & BTRFS_BLOCK_GROUP_SYSTEM) {
                /*
                 * TODO: Cleanup of inserted chunk root in case of
@@ -4058,8 +4065,9 @@ static int __finish_chunk_alloc(struct btrfs_trans_handle *trans,
                                             item_size);
        }
 
-out_free:
+out:
        kfree(chunk);
+       free_extent_map(em);
        return ret;
 }
 
@@ -4074,27 +4082,9 @@ int btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
                      struct btrfs_root *extent_root, u64 type)
 {
        u64 chunk_offset;
-       u64 chunk_size;
-       u64 stripe_size;
-       struct map_lookup *map;
-       struct btrfs_root *chunk_root = extent_root->fs_info->chunk_root;
-       int ret;
-
-       ret = find_next_chunk(chunk_root, BTRFS_FIRST_CHUNK_TREE_OBJECTID,
-                             &chunk_offset);
-       if (ret)
-               return ret;
 
-       ret = __btrfs_alloc_chunk(trans, extent_root, &map, &chunk_size,
-                                 &stripe_size, chunk_offset, type);
-       if (ret)
-               return ret;
-
-       ret = __finish_chunk_alloc(trans, extent_root, map, chunk_offset,
-                                  chunk_size, stripe_size);
-       if (ret)
-               return ret;
-       return 0;
+       chunk_offset = find_next_chunk(extent_root->fs_info);
+       return __btrfs_alloc_chunk(trans, extent_root, chunk_offset, type);
 }
 
 static noinline int init_first_rw_device(struct btrfs_trans_handle *trans,
@@ -4103,66 +4093,31 @@ static noinline int init_first_rw_device(struct btrfs_trans_handle *trans,
 {
        u64 chunk_offset;
        u64 sys_chunk_offset;
-       u64 chunk_size;
-       u64 sys_chunk_size;
-       u64 stripe_size;
-       u64 sys_stripe_size;
        u64 alloc_profile;
-       struct map_lookup *map;
-       struct map_lookup *sys_map;
        struct btrfs_fs_info *fs_info = root->fs_info;
        struct btrfs_root *extent_root = fs_info->extent_root;
        int ret;
 
-       ret = find_next_chunk(fs_info->chunk_root,
-                             BTRFS_FIRST_CHUNK_TREE_OBJECTID, &chunk_offset);
-       if (ret)
-               return ret;
-
+       chunk_offset = find_next_chunk(fs_info);
        alloc_profile = btrfs_get_alloc_profile(extent_root, 0);
-       ret = __btrfs_alloc_chunk(trans, extent_root, &map, &chunk_size,
-                                 &stripe_size, chunk_offset, alloc_profile);
+       ret = __btrfs_alloc_chunk(trans, extent_root, chunk_offset,
+                                 alloc_profile);
        if (ret)
                return ret;
 
-       sys_chunk_offset = chunk_offset + chunk_size;
-
+       sys_chunk_offset = find_next_chunk(root->fs_info);
        alloc_profile = btrfs_get_alloc_profile(fs_info->chunk_root, 0);
-       ret = __btrfs_alloc_chunk(trans, extent_root, &sys_map,
-                                 &sys_chunk_size, &sys_stripe_size,
-                                 sys_chunk_offset, alloc_profile);
+       ret = __btrfs_alloc_chunk(trans, extent_root, sys_chunk_offset,
+                                 alloc_profile);
        if (ret) {
                btrfs_abort_transaction(trans, root, ret);
                goto out;
        }
 
        ret = btrfs_add_device(trans, fs_info->chunk_root, device);
-       if (ret) {
-               btrfs_abort_transaction(trans, root, ret);
-               goto out;
-       }
-
-       /*
-        * Modifying chunk tree needs allocating new blocks from both
-        * system block group and metadata block group. So we only can
-        * do operations require modifying the chunk tree after both
-        * block groups were created.
-        */
-       ret = __finish_chunk_alloc(trans, extent_root, map, chunk_offset,
-                                  chunk_size, stripe_size);
-       if (ret) {
-               btrfs_abort_transaction(trans, root, ret);
-               goto out;
-       }
-
-       ret = __finish_chunk_alloc(trans, extent_root, sys_map,
-                                  sys_chunk_offset, sys_chunk_size,
-                                  sys_stripe_size);
        if (ret)
                btrfs_abort_transaction(trans, root, ret);
-
 out:
-
        return ret;
 }
 
@@ -4435,9 +4390,6 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
        map = (struct map_lookup *)em->bdev;
        offset = logical - em->start;
 
-       if (mirror_num > map->num_stripes)
-               mirror_num = 0;
-
        stripe_len = map->stripe_len;
        stripe_nr = offset;
        /*
@@ -5367,7 +5319,6 @@ static struct btrfs_device *add_missing_dev(struct btrfs_root *root,
                return NULL;
        list_add(&device->dev_list,
                 &fs_devices->devices);
-       device->dev_root = root->fs_info->dev_root;
        device->devid = devid;
        device->work.func = pending_bios_fn;
        device->fs_devices = fs_devices;
@@ -5593,7 +5544,6 @@ static int read_one_dev(struct btrfs_root *root,
        }
 
        fill_device_from_item(leaf, dev_item, device);
-       device->dev_root = root->fs_info->dev_root;
        device->in_fs_metadata = 1;
        if (device->writeable && !device->is_tgtdev_for_dev_replace) {
                device->fs_devices->total_rw_bytes += device->total_bytes;
@@ -5751,6 +5701,17 @@ error:
        return ret;
 }
 
+void btrfs_init_devices_late(struct btrfs_fs_info *fs_info)
+{
+       struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
+       struct btrfs_device *device;
+
+       mutex_lock(&fs_devices->device_list_mutex);
+       list_for_each_entry(device, &fs_devices->devices, dev_list)
+               device->dev_root = fs_info->dev_root;
+       mutex_unlock(&fs_devices->device_list_mutex);
+}
+
 static void __btrfs_reset_dev_stats(struct btrfs_device *dev)
 {
        int i;