]> git.proxmox.com Git - mirror_ubuntu-eoan-kernel.git/blobdiff - fs/btrfs/dev-replace.c
Btrfs: fix lockdep deadlock warning due to dev_replace
[mirror_ubuntu-eoan-kernel.git] / fs / btrfs / dev-replace.c
index cbb7dbfb3fffffc54f621b8dc43a7b1ec9a1185b..8c8b48971bc77d56d59a8b7be0f4a872f1ff9c4f 100644 (file)
@@ -202,13 +202,13 @@ int btrfs_run_dev_replace(struct btrfs_trans_handle *trans,
        struct btrfs_dev_replace_item *ptr;
        struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace;
 
-       btrfs_dev_replace_lock(dev_replace);
+       btrfs_dev_replace_lock(dev_replace, 0);
        if (!dev_replace->is_valid ||
            !dev_replace->item_needs_writeback) {
-               btrfs_dev_replace_unlock(dev_replace);
+               btrfs_dev_replace_unlock(dev_replace, 0);
                return 0;
        }
-       btrfs_dev_replace_unlock(dev_replace);
+       btrfs_dev_replace_unlock(dev_replace, 0);
 
        key.objectid = 0;
        key.type = BTRFS_DEV_REPLACE_KEY;
@@ -264,7 +264,7 @@ int btrfs_run_dev_replace(struct btrfs_trans_handle *trans,
        ptr = btrfs_item_ptr(eb, path->slots[0],
                             struct btrfs_dev_replace_item);
 
-       btrfs_dev_replace_lock(dev_replace);
+       btrfs_dev_replace_lock(dev_replace, 1);
        if (dev_replace->srcdev)
                btrfs_set_dev_replace_src_devid(eb, ptr,
                        dev_replace->srcdev->devid);
@@ -287,7 +287,7 @@ int btrfs_run_dev_replace(struct btrfs_trans_handle *trans,
        btrfs_set_dev_replace_cursor_right(eb, ptr,
                dev_replace->cursor_right);
        dev_replace->item_needs_writeback = 0;
-       btrfs_dev_replace_unlock(dev_replace);
+       btrfs_dev_replace_unlock(dev_replace, 1);
 
        btrfs_mark_buffer_dirty(eb);
 
@@ -356,7 +356,7 @@ int btrfs_dev_replace_start(struct btrfs_root *root,
                return PTR_ERR(trans);
        }
 
-       btrfs_dev_replace_lock(dev_replace);
+       btrfs_dev_replace_lock(dev_replace, 1);
        switch (dev_replace->replace_state) {
        case BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED:
        case BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED:
@@ -395,7 +395,7 @@ int btrfs_dev_replace_start(struct btrfs_root *root,
        dev_replace->is_valid = 1;
        dev_replace->item_needs_writeback = 1;
        args->result = BTRFS_IOCTL_DEV_REPLACE_RESULT_NO_ERROR;
-       btrfs_dev_replace_unlock(dev_replace);
+       btrfs_dev_replace_unlock(dev_replace, 1);
 
        ret = btrfs_sysfs_add_device_link(tgt_device->fs_devices, tgt_device);
        if (ret)
@@ -407,7 +407,7 @@ int btrfs_dev_replace_start(struct btrfs_root *root,
        trans = btrfs_start_transaction(root, 0);
        if (IS_ERR(trans)) {
                ret = PTR_ERR(trans);
-               btrfs_dev_replace_lock(dev_replace);
+               btrfs_dev_replace_lock(dev_replace, 1);
                goto leave;
        }
 
@@ -433,7 +433,7 @@ int btrfs_dev_replace_start(struct btrfs_root *root,
 leave:
        dev_replace->srcdev = NULL;
        dev_replace->tgtdev = NULL;
-       btrfs_dev_replace_unlock(dev_replace);
+       btrfs_dev_replace_unlock(dev_replace, 1);
        btrfs_destroy_dev_replace_tgtdev(fs_info, tgt_device);
        return ret;
 }
@@ -471,18 +471,18 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
        /* don't allow cancel or unmount to disturb the finishing procedure */
        mutex_lock(&dev_replace->lock_finishing_cancel_unmount);
 
-       btrfs_dev_replace_lock(dev_replace);
+       btrfs_dev_replace_lock(dev_replace, 0);
        /* was the operation canceled, or is it finished? */
        if (dev_replace->replace_state !=
            BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED) {
-               btrfs_dev_replace_unlock(dev_replace);
+               btrfs_dev_replace_unlock(dev_replace, 0);
                mutex_unlock(&dev_replace->lock_finishing_cancel_unmount);
                return 0;
        }
 
        tgt_device = dev_replace->tgtdev;
        src_device = dev_replace->srcdev;
-       btrfs_dev_replace_unlock(dev_replace);
+       btrfs_dev_replace_unlock(dev_replace, 0);
 
        /*
         * flush all outstanding I/O and inode extent mappings before the
@@ -507,7 +507,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
        /* keep away write_all_supers() during the finishing procedure */
        mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
        mutex_lock(&root->fs_info->chunk_mutex);
-       btrfs_dev_replace_lock(dev_replace);
+       btrfs_dev_replace_lock(dev_replace, 1);
        dev_replace->replace_state =
                scrub_ret ? BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED
                          : BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED;
@@ -528,7 +528,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
                                rcu_str_deref(src_device->name),
                              src_device->devid,
                              rcu_str_deref(tgt_device->name), scrub_ret);
-               btrfs_dev_replace_unlock(dev_replace);
+               btrfs_dev_replace_unlock(dev_replace, 1);
                mutex_unlock(&root->fs_info->chunk_mutex);
                mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
                mutex_unlock(&uuid_mutex);
@@ -565,7 +565,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
        list_add(&tgt_device->dev_alloc_list, &fs_info->fs_devices->alloc_list);
        fs_info->fs_devices->rw_devices++;
 
-       btrfs_dev_replace_unlock(dev_replace);
+       btrfs_dev_replace_unlock(dev_replace, 1);
 
        btrfs_rm_dev_replace_blocked(fs_info);
 
@@ -649,7 +649,7 @@ void btrfs_dev_replace_status(struct btrfs_fs_info *fs_info,
        struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace;
        struct btrfs_device *srcdev;
 
-       btrfs_dev_replace_lock(dev_replace);
+       btrfs_dev_replace_lock(dev_replace, 0);
        /* even if !dev_replace_is_valid, the values are good enough for
         * the replace_status ioctl */
        args->result = BTRFS_IOCTL_DEV_REPLACE_RESULT_NO_ERROR;
@@ -675,7 +675,7 @@ void btrfs_dev_replace_status(struct btrfs_fs_info *fs_info,
                        div_u64(btrfs_device_get_total_bytes(srcdev), 1000));
                break;
        }
-       btrfs_dev_replace_unlock(dev_replace);
+       btrfs_dev_replace_unlock(dev_replace, 0);
 }
 
 int btrfs_dev_replace_cancel(struct btrfs_fs_info *fs_info,
@@ -698,13 +698,13 @@ static u64 __btrfs_dev_replace_cancel(struct btrfs_fs_info *fs_info)
                return -EROFS;
 
        mutex_lock(&dev_replace->lock_finishing_cancel_unmount);
-       btrfs_dev_replace_lock(dev_replace);
+       btrfs_dev_replace_lock(dev_replace, 1);
        switch (dev_replace->replace_state) {
        case BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED:
        case BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED:
        case BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED:
                result = BTRFS_IOCTL_DEV_REPLACE_RESULT_NOT_STARTED;
-               btrfs_dev_replace_unlock(dev_replace);
+               btrfs_dev_replace_unlock(dev_replace, 1);
                goto leave;
        case BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED:
        case BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED:
@@ -717,7 +717,7 @@ static u64 __btrfs_dev_replace_cancel(struct btrfs_fs_info *fs_info)
        dev_replace->replace_state = BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED;
        dev_replace->time_stopped = get_seconds();
        dev_replace->item_needs_writeback = 1;
-       btrfs_dev_replace_unlock(dev_replace);
+       btrfs_dev_replace_unlock(dev_replace, 1);
        btrfs_scrub_cancel(fs_info);
 
        trans = btrfs_start_transaction(root, 0);
@@ -740,7 +740,7 @@ void btrfs_dev_replace_suspend_for_unmount(struct btrfs_fs_info *fs_info)
        struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace;
 
        mutex_lock(&dev_replace->lock_finishing_cancel_unmount);
-       btrfs_dev_replace_lock(dev_replace);
+       btrfs_dev_replace_lock(dev_replace, 1);
        switch (dev_replace->replace_state) {
        case BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED:
        case BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED:
@@ -756,7 +756,7 @@ void btrfs_dev_replace_suspend_for_unmount(struct btrfs_fs_info *fs_info)
                break;
        }
 
-       btrfs_dev_replace_unlock(dev_replace);
+       btrfs_dev_replace_unlock(dev_replace, 1);
        mutex_unlock(&dev_replace->lock_finishing_cancel_unmount);
 }
 
@@ -766,12 +766,12 @@ int btrfs_resume_dev_replace_async(struct btrfs_fs_info *fs_info)
        struct task_struct *task;
        struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace;
 
-       btrfs_dev_replace_lock(dev_replace);
+       btrfs_dev_replace_lock(dev_replace, 1);
        switch (dev_replace->replace_state) {
        case BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED:
        case BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED:
        case BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED:
-               btrfs_dev_replace_unlock(dev_replace);
+               btrfs_dev_replace_unlock(dev_replace, 1);
                return 0;
        case BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED:
                break;
@@ -784,10 +784,10 @@ int btrfs_resume_dev_replace_async(struct btrfs_fs_info *fs_info)
                btrfs_info(fs_info, "cannot continue dev_replace, tgtdev is missing");
                btrfs_info(fs_info,
                        "you may cancel the operation after 'mount -o degraded'");
-               btrfs_dev_replace_unlock(dev_replace);
+               btrfs_dev_replace_unlock(dev_replace, 1);
                return 0;
        }
-       btrfs_dev_replace_unlock(dev_replace);
+       btrfs_dev_replace_unlock(dev_replace, 1);
 
        WARN_ON(atomic_xchg(
                &fs_info->mutually_exclusive_operation_running, 1));
@@ -865,48 +865,58 @@ int btrfs_dev_replace_is_ongoing(struct btrfs_dev_replace *dev_replace)
        return 1;
 }
 
-void btrfs_dev_replace_lock(struct btrfs_dev_replace *dev_replace)
+void btrfs_dev_replace_lock(struct btrfs_dev_replace *dev_replace, int rw)
 {
-       /* the beginning is just an optimization for the typical case */
-       if (atomic_read(&dev_replace->nesting_level) == 0) {
-acquire_lock:
-               /* this is not a nested case where the same thread
-                * is trying to acqurire the same lock twice */
-               mutex_lock(&dev_replace->lock);
-               mutex_lock(&dev_replace->lock_management_lock);
-               dev_replace->lock_owner = current->pid;
-               atomic_inc(&dev_replace->nesting_level);
-               mutex_unlock(&dev_replace->lock_management_lock);
-               return;
+       if (rw == 1) {
+               /* write */
+again:
+               wait_event(dev_replace->read_lock_wq,
+                          atomic_read(&dev_replace->blocking_readers) == 0);
+               write_lock(&dev_replace->lock);
+               if (atomic_read(&dev_replace->blocking_readers)) {
+                       write_unlock(&dev_replace->lock);
+                       goto again;
+               }
+       } else {
+               read_lock(&dev_replace->lock);
+               atomic_inc(&dev_replace->read_locks);
        }
+}
 
-       mutex_lock(&dev_replace->lock_management_lock);
-       if (atomic_read(&dev_replace->nesting_level) > 0 &&
-           dev_replace->lock_owner == current->pid) {
-               WARN_ON(!mutex_is_locked(&dev_replace->lock));
-               atomic_inc(&dev_replace->nesting_level);
-               mutex_unlock(&dev_replace->lock_management_lock);
-               return;
+void btrfs_dev_replace_unlock(struct btrfs_dev_replace *dev_replace, int rw)
+{
+       if (rw == 1) {
+               /* write */
+               ASSERT(atomic_read(&dev_replace->blocking_readers) == 0);
+               write_unlock(&dev_replace->lock);
+       } else {
+               ASSERT(atomic_read(&dev_replace->read_locks) > 0);
+               atomic_dec(&dev_replace->read_locks);
+               read_unlock(&dev_replace->lock);
        }
+}
 
-       mutex_unlock(&dev_replace->lock_management_lock);
-       goto acquire_lock;
+/* inc blocking cnt and release read lock */
+void btrfs_dev_replace_set_lock_blocking(
+                                       struct btrfs_dev_replace *dev_replace)
+{
+       /* only set blocking for read lock */
+       ASSERT(atomic_read(&dev_replace->read_locks) > 0);
+       atomic_inc(&dev_replace->blocking_readers);
+       read_unlock(&dev_replace->lock);
 }
 
-void btrfs_dev_replace_unlock(struct btrfs_dev_replace *dev_replace)
+/* acquire read lock and dec blocking cnt */
+void btrfs_dev_replace_clear_lock_blocking(
+                                       struct btrfs_dev_replace *dev_replace)
 {
-       WARN_ON(!mutex_is_locked(&dev_replace->lock));
-       mutex_lock(&dev_replace->lock_management_lock);
-       WARN_ON(atomic_read(&dev_replace->nesting_level) < 1);
-       WARN_ON(dev_replace->lock_owner != current->pid);
-       atomic_dec(&dev_replace->nesting_level);
-       if (atomic_read(&dev_replace->nesting_level) == 0) {
-               dev_replace->lock_owner = 0;
-               mutex_unlock(&dev_replace->lock_management_lock);
-               mutex_unlock(&dev_replace->lock);
-       } else {
-               mutex_unlock(&dev_replace->lock_management_lock);
-       }
+       /* only set blocking for read lock */
+       ASSERT(atomic_read(&dev_replace->read_locks) > 0);
+       ASSERT(atomic_read(&dev_replace->blocking_readers) > 0);
+       read_lock(&dev_replace->lock);
+       if (atomic_dec_and_test(&dev_replace->blocking_readers) &&
+           waitqueue_active(&dev_replace->read_lock_wq))
+               wake_up(&dev_replace->read_lock_wq);
 }
 
 void btrfs_bio_counter_inc_noblocked(struct btrfs_fs_info *fs_info)