btrfs: do not start relocation until in progress drops are done

author Josef Bacik <josef@toxicpanda.com>

Fri, 18 Feb 2022 19:56:10 +0000 (14:56 -0500)

committer Paolo Pisati <paolo.pisati@canonical.com>

Wed, 9 Mar 2022 14:17:59 +0000 (15:17 +0100)
author Josef Bacik <josef@toxicpanda.com>
Fri, 18 Feb 2022 19:56:10 +0000 (14:56 -0500)
committer Paolo Pisati <paolo.pisati@canonical.com>
Wed, 9 Mar 2022 14:17:59 +0000 (15:17 +0100)
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h

index ae06ad5593535be524ef9ccf20156f7192c9da41..b46409801647b414cdde7303168a7b9c6b030662 100644 (file)
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -593,6 +593,9 @@ enum {
         /* Indicate whether there are any tree modification log users */
         BTRFS_FS_TREE_MOD_LOG_USERS,
  
+       /* Indicate we have half completed snapshot deletions pending. */
+       BTRFS_FS_UNFINISHED_DROPS,
+
  #if BITS_PER_LONG == 32
         /* Indicate if we have error/warn message printed on 32bit systems */
         BTRFS_FS_32BIT_ERROR,
@@ -1098,8 +1101,15 @@ enum {
         BTRFS_ROOT_HAS_LOG_TREE,
         /* Qgroup flushing is in progress */
         BTRFS_ROOT_QGROUP_FLUSHING,
+       /* This root has a drop operation that was started previously. */
+       BTRFS_ROOT_UNFINISHED_DROP,
  };
  
+static inline void btrfs_wake_unfinished_drop(struct btrfs_fs_info *fs_info)
+{
+       clear_and_wake_up_bit(BTRFS_FS_UNFINISHED_DROPS, &fs_info->flags);
+}
+
  /*
   * Record swapped tree blocks of a subvolume tree for delayed subtree trace
   * code. For detail check comment in fs/btrfs/qgroup.c.
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c

index 2c3e106a02704c4a3efc38ec25be02c8c48690de..2180fcef56cabb646aa092dc9f1474f7eb3b2838 100644 (file)
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -3659,6 +3659,10 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
  
         set_bit(BTRFS_FS_OPEN, &fs_info->flags);
  
+       /* Kick the cleaner thread so it'll start deleting snapshots. */
+       if (test_bit(BTRFS_FS_UNFINISHED_DROPS, &fs_info->flags))
+               wake_up_process(fs_info->cleaner_kthread);
+
  clear_oneshot:
         btrfs_clear_oneshot_options(fs_info);
         return 0;
@@ -4340,6 +4344,12 @@ void __cold close_ctree(struct btrfs_fs_info *fs_info)
          */
         kthread_park(fs_info->cleaner_kthread);
  
+       /*
+        * If we had UNFINISHED_DROPS we could still be processing them, so
+        * clear that bit and wake up relocation so it can stop.
+        */
+       btrfs_wake_unfinished_drop(fs_info);
+
         /* wait for the qgroup rescan worker to stop */
         btrfs_qgroup_wait_for_completion(fs_info, false);
  
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c

index 87c23c5c0f26da21f6d0cc04c9c5fd35ce479a21..514adc83577f0f626a3b1e03a43fd2438d08cd59 100644 (file)
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -5541,6 +5541,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref, int for_reloc)
         int ret;
         int level;
         bool root_dropped = false;
+       bool unfinished_drop = false;
  
         btrfs_debug(fs_info, "Drop subvolume %llu", root->root_key.objectid);
  
@@ -5583,6 +5584,8 @@ int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref, int for_reloc)
          * already dropped.
          */
         set_bit(BTRFS_ROOT_DELETING, &root->state);
+       unfinished_drop = test_bit(BTRFS_ROOT_UNFINISHED_DROP, &root->state);
+
         if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
                 level = btrfs_header_level(root->node);
                 path->nodes[level] = btrfs_lock_root_node(root);
@@ -5757,6 +5760,13 @@ out_free:
         kfree(wc);
         btrfs_free_path(path);
  out:
+       /*
+        * We were an unfinished drop root, check to see if there are any
+        * pending, and if not clear and wake up any waiters.
+        */
+       if (!err && unfinished_drop)
+               btrfs_maybe_wake_unfinished_drop(fs_info);
+
         /*
          * So if we need to stop dropping the snapshot for whatever reason we
          * need to make sure to add it back to the dead root list so that we
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c

index d81bee621d3738f6015338cff7231bb49adc6a38..a050f9748fa7fc8908ba4138f803d61138d74fa5 100644 (file)
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -3967,6 +3967,19 @@ int btrfs_relocate_block_group(struct btrfs_fs_info *fs_info, u64 group_start)
         int rw = 0;
         int err = 0;
  
+       /*
+        * This only gets set if we had a half-deleted snapshot on mount.  We
+        * cannot allow relocation to start while we're still trying to clean up
+        * these pending deletions.
+        */
+       ret = wait_on_bit(&fs_info->flags, BTRFS_FS_UNFINISHED_DROPS, TASK_INTERRUPTIBLE);
+       if (ret)
+               return ret;
+
+       /* We may have been woken up by close_ctree, so bail if we're closing. */
+       if (btrfs_fs_closing(fs_info))
+               return -EINTR;
+
         bg = btrfs_lookup_block_group(fs_info, group_start);
         if (!bg)
                 return -ENOENT;
diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c

index db37a37996497828fa6124df02547fc6e03a9a34..1fa0e5e2e3505e85f7fc2da0a65a520c553e6f47 100644 (file)
--- a/fs/btrfs/root-tree.c
+++ b/fs/btrfs/root-tree.c
@@ -280,6 +280,21 @@ int btrfs_find_orphan_roots(struct btrfs_fs_info *fs_info)
  
                 WARN_ON(!test_bit(BTRFS_ROOT_ORPHAN_ITEM_INSERTED, &root->state));
                 if (btrfs_root_refs(&root->root_item) == 0) {
+                       struct btrfs_key drop_key;
+
+                       btrfs_disk_key_to_cpu(&drop_key, &root->root_item.drop_progress);
+                       /*
+                        * If we have a non-zero drop_progress then we know we
+                        * made it partly through deleting this snapshot, and
+                        * thus we need to make sure we block any balance from
+                        * happening until this snapshot is completely dropped.
+                        */
+                       if (drop_key.objectid != 0 || drop_key.type != 0 ||
+                           drop_key.offset != 0) {
+                               set_bit(BTRFS_FS_UNFINISHED_DROPS, &fs_info->flags);
+                               set_bit(BTRFS_ROOT_UNFINISHED_DROP, &root->state);
+                       }
+
                         set_bit(BTRFS_ROOT_DEAD_TREE, &root->state);
                         btrfs_add_dead_root(root);
                 }
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c

index 3c5b1f72129acf05bb7c3bb0f60aef0f4794cc02..9a6009108ea5592c8da9ce4bd7b4d3bf141282d9 100644 (file)
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -1340,6 +1340,32 @@ again:
         return 0;
  }
  
+/*
+ * If we had a pending drop we need to see if there are any others left in our
+ * dead roots list, and if not clear our bit and wake any waiters.
+ */
+void btrfs_maybe_wake_unfinished_drop(struct btrfs_fs_info *fs_info)
+{
+       /*
+        * We put the drop in progress roots at the front of the list, so if the
+        * first entry doesn't have UNFINISHED_DROP set we can wake everybody
+        * up.
+        */
+       spin_lock(&fs_info->trans_lock);
+       if (!list_empty(&fs_info->dead_roots)) {
+               struct btrfs_root *root = list_first_entry(&fs_info->dead_roots,
+                                                          struct btrfs_root,
+                                                          root_list);
+               if (test_bit(BTRFS_ROOT_UNFINISHED_DROP, &root->state)) {
+                       spin_unlock(&fs_info->trans_lock);
+                       return;
+               }
+       }
+       spin_unlock(&fs_info->trans_lock);
+
+       btrfs_wake_unfinished_drop(fs_info);
+}
+
  /*
   * dead roots are old snapshots that need to be deleted.  This allocates
   * a dirty root struct and adds it into the list of dead roots that need to
@@ -1352,7 +1378,12 @@ void btrfs_add_dead_root(struct btrfs_root *root)
         spin_lock(&fs_info->trans_lock);
         if (list_empty(&root->root_list)) {
                 btrfs_grab_root(root);
-               list_add_tail(&root->root_list, &fs_info->dead_roots);
+
+               /* We want to process the partially complete drops first. */
+               if (test_bit(BTRFS_ROOT_UNFINISHED_DROP, &root->state))
+                       list_add(&root->root_list, &fs_info->dead_roots);
+               else
+                       list_add_tail(&root->root_list, &fs_info->dead_roots);
         }
         spin_unlock(&fs_info->trans_lock);
  }
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h

index eba07b8119bbd728369c272da74743382b3036ce..0ded32bbd001ec3a67a599183671b8a394bfda0b 100644 (file)
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -217,6 +217,7 @@ int btrfs_wait_for_commit(struct btrfs_fs_info *fs_info, u64 transid);
  
  void btrfs_add_dead_root(struct btrfs_root *root);
  int btrfs_defrag_root(struct btrfs_root *root);
+void btrfs_maybe_wake_unfinished_drop(struct btrfs_fs_info *fs_info);
  int btrfs_clean_one_deleted_snapshot(struct btrfs_root *root);
  int btrfs_commit_transaction(struct btrfs_trans_handle *trans);
  int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans);
author	Josef Bacik <josef@toxicpanda.com>
	Fri, 18 Feb 2022 19:56:10 +0000 (14:56 -0500)
committer	Paolo Pisati <paolo.pisati@canonical.com>
	Wed, 9 Mar 2022 14:17:59 +0000 (15:17 +0100)
fs/btrfs/ctree.h		patch \| blob \| blame \| history
fs/btrfs/disk-io.c		patch \| blob \| blame \| history
fs/btrfs/extent-tree.c		patch \| blob \| blame \| history
fs/btrfs/relocation.c		patch \| blob \| blame \| history
fs/btrfs/root-tree.c		patch \| blob \| blame \| history
fs/btrfs/transaction.c		patch \| blob \| blame \| history
fs/btrfs/transaction.h		patch \| blob \| blame \| history