]> git.proxmox.com Git - mirror_ubuntu-kernels.git/commitdiff
bcachefs: Move journal reclaim to a kthread
authorKent Overstreet <kent.overstreet@gmail.com>
Fri, 20 Nov 2020 01:55:33 +0000 (20:55 -0500)
committerKent Overstreet <kent.overstreet@linux.dev>
Sun, 22 Oct 2023 21:08:48 +0000 (17:08 -0400)
This is to make tracing easier.

Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
13 files changed:
fs/bcachefs/alloc_background.c
fs/bcachefs/bcachefs.h
fs/bcachefs/btree_gc.c
fs/bcachefs/btree_key_cache.c
fs/bcachefs/chardev.c
fs/bcachefs/journal.c
fs/bcachefs/journal_io.c
fs/bcachefs/journal_reclaim.c
fs/bcachefs/journal_reclaim.h
fs/bcachefs/journal_types.h
fs/bcachefs/movinggc.c
fs/bcachefs/rebalance.c
fs/bcachefs/super.c

index 8f0c1f378b776514982f5ead7b4319e90f3bd90a..078968f30175b1e6292a3b32031b00050bda469e 100644 (file)
@@ -1409,7 +1409,7 @@ int bch2_dev_allocator_start(struct bch_dev *ca)
                return 0;
 
        p = kthread_create(bch2_allocator_thread, ca,
-                          "bch_alloc[%s]", ca->name);
+                          "bch-alloc/%s", ca->name);
        if (IS_ERR(p))
                return PTR_ERR(p);
 
index d77d1fc1cfedf6f6005dc56d9682c84c49850e1e..4fe3f9257752546c4927829c7691dc93572a3a8b 100644 (file)
@@ -650,7 +650,6 @@ struct bch_fs {
        struct workqueue_struct *wq;
        /* copygc needs its own workqueue for index updates.. */
        struct workqueue_struct *copygc_wq;
-       struct workqueue_struct *journal_reclaim_wq;
 
        /* ALLOCATION */
        struct delayed_work     pd_controllers_update;
index da0ad8f507754fcae12276b935a5e5348a8abe8e..df018a2e463eb28d727d553e783a6510f8fe845b 100644 (file)
@@ -1427,7 +1427,7 @@ int bch2_gc_thread_start(struct bch_fs *c)
 
        BUG_ON(c->gc_thread);
 
-       p = kthread_create(bch2_gc_thread, c, "bch_gc");
+       p = kthread_create(bch2_gc_thread, c, "bch-gc/%s", c->name);
        if (IS_ERR(p))
                return PTR_ERR(p);
 
index 99e03852b814e77fc88548eadd26d77d8d15ad7c..d1f226e66158d3114b4eaba460ab5a5b642dfcbd 100644 (file)
@@ -497,7 +497,7 @@ bool bch2_btree_insert_key_cached(struct btree_trans *trans,
                                &ck->journal, btree_key_cache_journal_flush);
 
        if (kick_reclaim)
-               mod_delayed_work(c->journal_reclaim_wq, &c->journal.reclaim_work, 0);
+               journal_reclaim_kick(&c->journal);
        return true;
 }
 
index cd5c850a41ecd90ec5cff30bda58ec06b043cce3..7c77fd09c83492ba6e4872cd07e4e2caed7d3099 100644 (file)
@@ -341,7 +341,8 @@ static long bch2_ioctl_data(struct bch_fs *c,
        ctx->c = c;
        ctx->arg = arg;
 
-       ctx->thread = kthread_create(bch2_data_thread, ctx, "[bcachefs]");
+       ctx->thread = kthread_create(bch2_data_thread, ctx,
+                                    "bch-data/%s", c->name);
        if (IS_ERR(ctx->thread)) {
                ret = PTR_ERR(ctx->thread);
                goto err;
index bb4353e673e7c31ef4c918bafce6ee2f12f50c1b..2c6aa36cc025a52f752b8a626dae54d9ab3035a0 100644 (file)
@@ -225,11 +225,14 @@ static bool journal_entry_close(struct journal *j)
  */
 static int journal_entry_open(struct journal *j)
 {
+       struct bch_fs *c = container_of(j, struct bch_fs, journal);
        struct journal_buf *buf = journal_cur_buf(j);
        union journal_res_state old, new;
        int u64s;
        u64 v;
 
+       BUG_ON(BCH_SB_CLEAN(c->disk_sb.sb));
+
        lockdep_assert_held(&j->lock);
        BUG_ON(journal_entry_is_open(j));
 
@@ -480,8 +483,10 @@ static bool journal_preres_available(struct journal *j,
 {
        bool ret = bch2_journal_preres_get_fast(j, res, new_u64s, flags);
 
-       if (!ret)
-               bch2_journal_reclaim_work(&j->reclaim_work.work);
+       if (!ret && mutex_trylock(&j->reclaim_lock)) {
+               bch2_journal_reclaim(j);
+               mutex_unlock(&j->reclaim_lock);
+       }
 
        return ret;
 }
@@ -888,7 +893,7 @@ void bch2_fs_journal_stop(struct journal *j)
                j->last_empty_seq + 1 != journal_cur_seq(j)));
 
        cancel_delayed_work_sync(&j->write_work);
-       cancel_delayed_work_sync(&j->reclaim_work);
+       bch2_journal_reclaim_stop(j);
 }
 
 int bch2_fs_journal_start(struct journal *j, u64 cur_seq,
@@ -1019,7 +1024,6 @@ int bch2_fs_journal_init(struct journal *j)
        spin_lock_init(&j->err_lock);
        init_waitqueue_head(&j->wait);
        INIT_DELAYED_WORK(&j->write_work, journal_write_work);
-       INIT_DELAYED_WORK(&j->reclaim_work, bch2_journal_reclaim_work);
        init_waitqueue_head(&j->pin_flush_wait);
        mutex_init(&j->reclaim_lock);
        mutex_init(&j->discard_lock);
@@ -1071,6 +1075,8 @@ void bch2_journal_debug_to_text(struct printbuf *out, struct journal *j)
               "last_seq:\t\t%llu\n"
               "last_seq_ondisk:\t%llu\n"
               "prereserved:\t\t%u/%u\n"
+              "nr direct reclaim:\t%llu\n"
+              "nr background reclaim:\t%llu\n"
               "current entry sectors:\t%u\n"
               "current entry error:\t%u\n"
               "current entry:\t\t",
@@ -1080,6 +1086,8 @@ void bch2_journal_debug_to_text(struct printbuf *out, struct journal *j)
               j->last_seq_ondisk,
               j->prereserved.reserved,
               j->prereserved.remaining,
+              j->nr_direct_reclaim,
+              j->nr_background_reclaim,
               j->cur_entry_sectors,
               j->cur_entry_error);
 
index 354d57a3cd59503803c479ba4ffb876988c830f3..79d5d892728fdc12eae7989ac106b8a6808198bc 100644 (file)
@@ -993,7 +993,7 @@ static void journal_write_done(struct closure *cl)
         * Must come before signaling write completion, for
         * bch2_fs_journal_stop():
         */
-       mod_delayed_work(c->journal_reclaim_wq, &j->reclaim_work, 0);
+       journal_reclaim_kick(&c->journal);
 
        /* also must come before signalling write completion: */
        closure_debug_destroy(cl);
@@ -1044,6 +1044,8 @@ void bch2_journal_write(struct closure *cl)
        unsigned i, sectors, bytes, u64s;
        int ret;
 
+       BUG_ON(BCH_SB_CLEAN(c->disk_sb.sb));
+
        bch2_journal_pin_put(j, le64_to_cpu(w->data->seq));
 
        journal_buf_realloc(j, w);
index 9c67597d1ec64945db9f5ce63cb78a0f3acd1ada..9f0d2e6aa4e30e2fa067d97a81639d403f368b20 100644 (file)
@@ -9,6 +9,7 @@
 #include "super.h"
 #include "trace.h"
 
+#include <linux/kthread.h>
 #include <linux/sched/mm.h>
 
 /* Free space calculations: */
@@ -534,9 +535,10 @@ static u64 journal_seq_to_flush(struct journal *j)
  * 512 journal entries or 25% of all journal buckets, then
  * journal_next_bucket() should not stall.
  */
-void bch2_journal_reclaim(struct journal *j)
+static void __bch2_journal_reclaim(struct journal *j, bool direct)
 {
        struct bch_fs *c = container_of(j, struct bch_fs, journal);
+       bool kthread = (current->flags & PF_KTHREAD) != 0;
        u64 seq_to_flush, nr_flushed = 0;
        size_t min_nr;
        unsigned flags;
@@ -551,6 +553,9 @@ void bch2_journal_reclaim(struct journal *j)
        flags = memalloc_noreclaim_save();
 
        do {
+               if (kthread && kthread_should_stop())
+                       break;
+
                bch2_journal_do_discards(j);
 
                seq_to_flush = journal_seq_to_flush(j);
@@ -582,26 +587,83 @@ void bch2_journal_reclaim(struct journal *j)
                                c->btree_key_cache.nr_dirty,
                                c->btree_key_cache.nr_keys);
 
-               nr_flushed += journal_flush_pins(j, seq_to_flush, min_nr);
+               nr_flushed = journal_flush_pins(j, seq_to_flush, min_nr);
+
+               if (direct)
+                       j->nr_direct_reclaim += nr_flushed;
+               else
+                       j->nr_background_reclaim += nr_flushed;
+               trace_journal_reclaim_finish(c, nr_flushed);
        } while (min_nr);
 
        memalloc_noreclaim_restore(flags);
+}
+
+void bch2_journal_reclaim(struct journal *j)
+{
+       __bch2_journal_reclaim(j, true);
+}
+
+static int bch2_journal_reclaim_thread(void *arg)
+{
+       struct journal *j = arg;
+       unsigned long next;
+
+       while (!kthread_should_stop()) {
+               j->reclaim_kicked = false;
+
+               mutex_lock(&j->reclaim_lock);
+               __bch2_journal_reclaim(j, false);
+               mutex_unlock(&j->reclaim_lock);
+
+               next = j->last_flushed + msecs_to_jiffies(j->reclaim_delay_ms);
 
-       trace_journal_reclaim_finish(c, nr_flushed);
+               while (1) {
+                       set_current_state(TASK_INTERRUPTIBLE);
+                       if (kthread_should_stop())
+                               break;
+                       if (j->reclaim_kicked)
+                               break;
+                       if (time_after_eq(jiffies, next))
+                               break;
+                       schedule_timeout(next - jiffies);
 
-       if (!bch2_journal_error(j))
-               queue_delayed_work(c->journal_reclaim_wq, &j->reclaim_work,
-                                  msecs_to_jiffies(j->reclaim_delay_ms));
+               }
+               __set_current_state(TASK_RUNNING);
+       }
+
+       return 0;
 }
 
-void bch2_journal_reclaim_work(struct work_struct *work)
+void bch2_journal_reclaim_stop(struct journal *j)
 {
-       struct journal *j = container_of(to_delayed_work(work),
-                               struct journal, reclaim_work);
+       struct task_struct *p = j->reclaim_thread;
 
-       mutex_lock(&j->reclaim_lock);
-       bch2_journal_reclaim(j);
-       mutex_unlock(&j->reclaim_lock);
+       j->reclaim_thread = NULL;
+
+       if (p) {
+               kthread_stop(p);
+               put_task_struct(p);
+       }
+}
+
+int bch2_journal_reclaim_start(struct journal *j)
+{
+       struct bch_fs *c = container_of(j, struct bch_fs, journal);
+       struct task_struct *p;
+
+       if (j->reclaim_thread)
+               return 0;
+
+       p = kthread_create(bch2_journal_reclaim_thread, j,
+                          "bch-reclaim/%s", c->name);
+       if (IS_ERR(p))
+               return PTR_ERR(p);
+
+       get_task_struct(p);
+       j->reclaim_thread = p;
+       wake_up_process(p);
+       return 0;
 }
 
 static int journal_flush_done(struct journal *j, u64 seq_to_flush,
index 8128907a7623cb223718c55ed2e63c380b0ce796..bae2c9210db8612ffb0cd47e731d5143c2b7a1e0 100644 (file)
@@ -10,6 +10,17 @@ enum journal_space_from {
        journal_space_clean,
 };
 
+static inline void journal_reclaim_kick(struct journal *j)
+{
+       struct task_struct *p = READ_ONCE(j->reclaim_thread);
+
+       if (p && !j->reclaim_kicked) {
+               j->reclaim_kicked = true;
+               if (p)
+                       wake_up_process(p);
+       }
+}
+
 unsigned bch2_journal_dev_buckets_available(struct journal *,
                                            struct journal_device *,
                                            enum journal_space_from);
@@ -55,7 +66,9 @@ void bch2_journal_pin_flush(struct journal *, struct journal_entry_pin *);
 
 void bch2_journal_do_discards(struct journal *);
 void bch2_journal_reclaim(struct journal *);
-void bch2_journal_reclaim_work(struct work_struct *);
+
+void bch2_journal_reclaim_stop(struct journal *);
+int bch2_journal_reclaim_start(struct journal *);
 
 bool bch2_journal_flush_pins(struct journal *, u64);
 
index 5f20653b8eb5ab532d8a65cfb032f00bb96f5cbc..6312a7f06d8798031ecd06af21e19d324606157a 100644 (file)
@@ -216,8 +216,12 @@ struct journal {
        struct write_point      wp;
        spinlock_t              err_lock;
 
-       struct delayed_work     reclaim_work;
        struct mutex            reclaim_lock;
+       struct task_struct      *reclaim_thread;
+       bool                    reclaim_kicked;
+       u64                     nr_direct_reclaim;
+       u64                     nr_background_reclaim;
+
        unsigned long           last_flushed;
        struct journal_entry_pin *flush_in_progress;
        wait_queue_head_t       pin_flush_wait;
index e858e2a35f8d3d84612bafd85b97943a63c8cda1..a9775cc84f66332d258169985deed2ae1fc156c5 100644 (file)
@@ -345,7 +345,7 @@ int bch2_copygc_start(struct bch_fs *c)
        if (bch2_fs_init_fault("copygc_start"))
                return -ENOMEM;
 
-       t = kthread_create(bch2_copygc_thread, c, "bch_copygc");
+       t = kthread_create(bch2_copygc_thread, c, "bch-copygc/%s", c->name);
        if (IS_ERR(t))
                return PTR_ERR(t);
 
index cce6f58fe60983f8b24e637183dda743ea1b2713..f9a12dd797a5f2c4cbda6bd515da5d488d5c8803 100644 (file)
@@ -314,7 +314,7 @@ int bch2_rebalance_start(struct bch_fs *c)
        if (c->opts.nochanges)
                return 0;
 
-       p = kthread_create(bch2_rebalance_thread, c, "bch_rebalance");
+       p = kthread_create(bch2_rebalance_thread, c, "bch-rebalance/%s", c->name);
        if (IS_ERR(p))
                return PTR_ERR(p);
 
index 12ce4a6277466a71a32f818c572346a15845ed51..98a875e08e9a960b716efeffa2c241ed39dab34e 100644 (file)
@@ -49,7 +49,6 @@
 #include <linux/debugfs.h>
 #include <linux/device.h>
 #include <linux/idr.h>
-#include <linux/kthread.h>
 #include <linux/module.h>
 #include <linux/percpu.h>
 #include <linux/random.h>
@@ -266,7 +265,7 @@ static void bch2_writes_disabled(struct percpu_ref *writes)
 void bch2_fs_read_only(struct bch_fs *c)
 {
        if (!test_bit(BCH_FS_RW, &c->flags)) {
-               cancel_delayed_work_sync(&c->journal.reclaim_work);
+               BUG_ON(c->journal.reclaim_thread);
                return;
        }
 
@@ -424,6 +423,12 @@ static int __bch2_fs_read_write(struct bch_fs *c, bool early)
 
        set_bit(BCH_FS_ALLOCATOR_RUNNING, &c->flags);
 
+       ret = bch2_journal_reclaim_start(&c->journal);
+       if (ret) {
+               bch_err(c, "error starting journal reclaim: %i", ret);
+               return ret;
+       }
+
        if (!early) {
                ret = bch2_fs_read_write_late(c);
                if (ret)
@@ -432,9 +437,6 @@ static int __bch2_fs_read_write(struct bch_fs *c, bool early)
 
        percpu_ref_reinit(&c->writes);
        set_bit(BCH_FS_RW, &c->flags);
-
-       queue_delayed_work(c->journal_reclaim_wq,
-                          &c->journal.reclaim_work, 0);
        return 0;
 err:
        __bch2_fs_read_only(c);
@@ -503,8 +505,6 @@ static void __bch2_fs_free(struct bch_fs *c)
        kfree(c->unused_inode_hints);
        free_heap(&c->copygc_heap);
 
-       if (c->journal_reclaim_wq)
-               destroy_workqueue(c->journal_reclaim_wq);
        if (c->copygc_wq)
                destroy_workqueue(c->copygc_wq);
        if (c->wq)
@@ -758,8 +758,6 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
                                WQ_FREEZABLE|WQ_MEM_RECLAIM, 1)) ||
            !(c->copygc_wq = alloc_workqueue("bcachefs_copygc",
                                WQ_FREEZABLE|WQ_MEM_RECLAIM|WQ_CPU_INTENSIVE, 1)) ||
-           !(c->journal_reclaim_wq = alloc_workqueue("bcachefs_journal_reclaim",
-                               WQ_FREEZABLE|WQ_MEM_RECLAIM|WQ_HIGHPRI, 1)) ||
            percpu_ref_init(&c->writes, bch2_writes_disabled,
                            PERCPU_REF_INIT_DEAD, GFP_KERNEL) ||
            mempool_init_kmalloc_pool(&c->fill_iter, 1, iter_size) ||