bcachefs: ja->discard_idx, ja->dirty_idx

author Kent Overstreet <kent.overstreet@gmail.com>

Sun, 3 Mar 2019 20:15:55 +0000 (15:15 -0500)

committer Kent Overstreet <kent.overstreet@linux.dev>

Sun, 22 Oct 2023 21:08:17 +0000 (17:08 -0400)
author Kent Overstreet <kent.overstreet@gmail.com>
Sun, 3 Mar 2019 20:15:55 +0000 (15:15 -0500)
committer Kent Overstreet <kent.overstreet@linux.dev>
Sun, 22 Oct 2023 21:08:17 +0000 (17:08 -0400)
diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c

index 3b3c342b2df2c1637878ba163ac572ab48c962cb..17add726f2acea37d86224f9f4c15cfa0cabc652 100644 (file)
--- a/fs/bcachefs/journal.c
+++ b/fs/bcachefs/journal.c
@@ -760,6 +760,7 @@ static int __bch2_set_nr_journal_buckets(struct bch_dev *ca, unsigned nr,
  
         while (ja->nr < nr) {
                 struct open_bucket *ob = NULL;
+               unsigned pos;
                 long bucket;
  
                 if (new_fs) {
@@ -786,20 +787,24 @@ static int __bch2_set_nr_journal_buckets(struct bch_dev *ca, unsigned nr,
                         preempt_disable();
                 }
  
-               __array_insert_item(ja->buckets,                ja->nr, ja->last_idx);
-               __array_insert_item(ja->bucket_seq,             ja->nr, ja->last_idx);
-               __array_insert_item(journal_buckets->buckets,   ja->nr, ja->last_idx);
+               pos = ja->nr ? (ja->cur_idx + 1) % ja->nr : 0;
+               __array_insert_item(ja->buckets,                ja->nr, pos);
+               __array_insert_item(ja->bucket_seq,             ja->nr, pos);
+               __array_insert_item(journal_buckets->buckets,   ja->nr, pos);
+               ja->nr++;
  
-               ja->buckets[ja->last_idx] = bucket;
-               ja->bucket_seq[ja->last_idx] = 0;
-               journal_buckets->buckets[ja->last_idx] = cpu_to_le64(bucket);
+               ja->buckets[pos] = bucket;
+               ja->bucket_seq[pos] = 0;
+               journal_buckets->buckets[pos] = cpu_to_le64(bucket);
  
-               if (ja->last_idx < ja->nr) {
-                       if (ja->cur_idx >= ja->last_idx)
-                               ja->cur_idx++;
-                       ja->last_idx++;
-               }
-               ja->nr++;
+               if (pos <= ja->discard_idx)
+                       ja->discard_idx = (ja->discard_idx + 1) % ja->nr;
+               if (pos <= ja->dirty_idx_ondisk)
+                       ja->dirty_idx_ondisk = (ja->dirty_idx_ondisk + 1) % ja->nr;
+               if (pos <= ja->dirty_idx)
+                       ja->dirty_idx = (ja->dirty_idx + 1) % ja->nr;
+               if (pos <= ja->cur_idx)
+                       ja->cur_idx = (ja->cur_idx + 1) % ja->nr;
  
                 bch2_mark_metadata_bucket(c, ca, bucket, BCH_DATA_JOURNAL,
                                           ca->mi.bucket_size,
@@ -1042,6 +1047,7 @@ int bch2_fs_journal_init(struct journal *j)
         mutex_init(&j->blacklist_lock);
         INIT_LIST_HEAD(&j->seq_blacklist);
         mutex_init(&j->reclaim_lock);
+       mutex_init(&j->discard_lock);
  
         lockdep_init_map(&j->res_map, "journal res", &res_key, 0);
  
@@ -1138,13 +1144,17 @@ ssize_t bch2_journal_print_debug(struct journal *j, char *buf)
                        "dev %u:\n"
                        "\tnr\t\t%u\n"
                        "\tavailable\t%u:%u\n"
-                      "\tcur_idx\t\t%u (seq %llu)\n"
-                      "\tlast_idx\t%u (seq %llu)\n",
+                      "\tdiscard_idx\t\t%u\n"
+                      "\tdirty_idx_ondisk\t%u (seq %llu)\n"
+                      "\tdirty_idx\t\t%u (seq %llu)\n"
+                      "\tcur_idx\t\t%u (seq %llu)\n",
                        iter, ja->nr,
                        bch2_journal_dev_buckets_available(j, ja),
                        ja->sectors_free,
-                      ja->cur_idx,     ja->bucket_seq[ja->cur_idx],
-                      ja->last_idx,    ja->bucket_seq[ja->last_idx]);
+                      ja->discard_idx,
+                      ja->dirty_idx_ondisk,    ja->bucket_seq[ja->dirty_idx_ondisk],
+                      ja->dirty_idx,           ja->bucket_seq[ja->dirty_idx],
+                      ja->cur_idx,             ja->bucket_seq[ja->cur_idx]);
         }
  
         spin_unlock(&j->lock);
diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c

index d4b82344221c6e4a5b2ebef0a588c6af4df7fca1..b6a51dff09784f683000d45395ae6a2975d7fe77 100644 (file)
--- a/fs/bcachefs/journal_io.c
+++ b/fs/bcachefs/journal_io.c
@@ -625,11 +625,12 @@ static void bch2_journal_read_device(struct closure *cl)
         ja->sectors_free = 0;
  
         /*
-        * Set last_idx to indicate the entire journal is full and needs to be
+        * Set dirty_idx to indicate the entire journal is full and needs to be
          * reclaimed - journal reclaim will immediately reclaim whatever isn't
          * pinned when it first runs:
          */
-       ja->last_idx = (ja->cur_idx + 1) % ja->nr;
+       ja->discard_idx = ja->dirty_idx_ondisk =
+               ja->dirty_idx = (ja->cur_idx + 1) % ja->nr;
  out:
         kvpfree(buf.data, buf.size);
         percpu_ref_put(&ca->io_ref);
@@ -1069,12 +1070,13 @@ static void journal_write_done(struct closure *cl)
                 goto err;
  
         spin_lock(&j->lock);
-       j->seq_ondisk           = seq;
-       j->last_seq_ondisk      = last_seq;
-
         if (seq >= j->pin.front)
                 journal_seq_pin(j, seq)->devs = devs;
  
+       j->seq_ondisk           = seq;
+       j->last_seq_ondisk      = last_seq;
+       bch2_journal_space_available(j);
+
         /*
          * Updating last_seq_ondisk may let bch2_journal_reclaim_work() discard
          * more buckets:
diff --git a/fs/bcachefs/journal_reclaim.c b/fs/bcachefs/journal_reclaim.c

index 431afeab42b0054079e063a8917348f77b5e8024..3a85fb8b852697006936da5930759aa568a02775 100644 (file)
--- a/fs/bcachefs/journal_reclaim.c
+++ b/fs/bcachefs/journal_reclaim.c
@@ -14,22 +14,20 @@ unsigned bch2_journal_dev_buckets_available(struct journal *j,
  {
         struct bch_fs *c = container_of(j, struct bch_fs, journal);
         unsigned next = (ja->cur_idx + 1) % ja->nr;
-       unsigned available = (ja->last_idx + ja->nr - next) % ja->nr;
+       unsigned available = (ja->discard_idx + ja->nr - next) % ja->nr;
  
         /*
          * Allocator startup needs some journal space before we can do journal
          * replay:
          */
-       if (available &&
-           test_bit(BCH_FS_ALLOCATOR_STARTED, &c->flags))
-               available--;
+       if (available && test_bit(BCH_FS_ALLOCATOR_STARTED, &c->flags))
+               --available;
  
         /*
          * Don't use the last bucket unless writing the new last_seq
          * will make another bucket available:
          */
-       if (available &&
-           journal_last_seq(j) <= ja->bucket_seq[ja->last_idx])
+       if (available && ja->dirty_idx_ondisk == ja->dirty_idx)
                 --available;
  
         return available;
@@ -55,12 +53,34 @@ void bch2_journal_space_available(struct journal *j)
         for_each_member_device_rcu(ca, c, i,
                                    &c->rw_devs[BCH_DATA_JOURNAL]) {
                 struct journal_device *ja = &ca->journal;
-               unsigned buckets_this_device, sectors_this_device;
  
                 if (!ja->nr)
                         continue;
  
+               while (ja->dirty_idx != ja->cur_idx &&
+                      ja->bucket_seq[ja->dirty_idx] < journal_last_seq(j))
+                       ja->dirty_idx = (ja->dirty_idx + 1) % ja->nr;
+
+               while (ja->dirty_idx_ondisk != ja->dirty_idx &&
+                      ja->bucket_seq[ja->dirty_idx_ondisk] < j->last_seq_ondisk)
+                       ja->dirty_idx_ondisk = (ja->dirty_idx_ondisk + 1) % ja->nr;
+
                 nr_online++;
+       }
+
+       if (nr_online < c->opts.metadata_replicas_required) {
+               ret = -EROFS;
+               sectors_next_entry = 0;
+               goto out;
+       }
+
+       for_each_member_device_rcu(ca, c, i,
+                                  &c->rw_devs[BCH_DATA_JOURNAL]) {
+               struct journal_device *ja = &ca->journal;
+               unsigned buckets_this_device, sectors_this_device;
+
+               if (!ja->nr)
+                       continue;
  
                 buckets_this_device = bch2_journal_dev_buckets_available(j, ja);
                 sectors_this_device = ja->sectors_free;
@@ -100,20 +120,17 @@ void bch2_journal_space_available(struct journal *j)
  
                 nr_devs++;
         }
-       rcu_read_unlock();
  
-       if (nr_online < c->opts.metadata_replicas_required) {
-               ret = -EROFS;
-               sectors_next_entry = 0;
-       } else if (!sectors_next_entry ||
-                  nr_devs < min_t(unsigned, nr_online,
-                                  c->opts.metadata_replicas)) {
+       if (!sectors_next_entry ||
+           nr_devs < min_t(unsigned, nr_online, c->opts.metadata_replicas)) {
                 ret = -ENOSPC;
                 sectors_next_entry = 0;
         } else if (!fifo_free(&j->pin)) {
                 ret = -ENOSPC;
                 sectors_next_entry = 0;
         }
+out:
+       rcu_read_unlock();
  
         j->cur_entry_sectors    = sectors_next_entry;
         j->cur_entry_error      = ret;
@@ -129,25 +146,23 @@ static bool should_discard_bucket(struct journal *j, struct journal_device *ja)
         bool ret;
  
         spin_lock(&j->lock);
-       ret = ja->nr &&
-               ja->last_idx != ja->cur_idx &&
-               ja->bucket_seq[ja->last_idx] < j->last_seq_ondisk;
+       ret = ja->discard_idx != ja->dirty_idx_ondisk;
         spin_unlock(&j->lock);
  
         return ret;
  }
  
  /*
- * Advance ja->last_idx as long as it points to buckets that are no longer
+ * Advance ja->discard_idx as long as it points to buckets that are no longer
   * dirty, issuing discards if necessary:
   */
-static void journal_do_discards(struct journal *j)
+static void bch2_journal_do_discards(struct journal *j)
  {
         struct bch_fs *c = container_of(j, struct bch_fs, journal);
         struct bch_dev *ca;
         unsigned iter;
  
-       mutex_lock(&j->reclaim_lock);
+       mutex_lock(&j->discard_lock);
  
         for_each_rw_member(ca, c, iter) {
                 struct journal_device *ja = &ca->journal;
@@ -157,18 +172,18 @@ static void journal_do_discards(struct journal *j)
                             bdev_max_discard_sectors(ca->disk_sb.bdev))
                                 blkdev_issue_discard(ca->disk_sb.bdev,
                                         bucket_to_sector(ca,
-                                               ja->buckets[ja->last_idx]),
+                                               ja->buckets[ja->discard_idx]),
                                         ca->mi.bucket_size, GFP_NOIO);
  
                         spin_lock(&j->lock);
-                       ja->last_idx = (ja->last_idx + 1) % ja->nr;
+                       ja->discard_idx = (ja->discard_idx + 1) % ja->nr;
  
                         bch2_journal_space_available(j);
                         spin_unlock(&j->lock);
                 }
         }
  
-       mutex_unlock(&j->reclaim_lock);
+       mutex_unlock(&j->discard_lock);
  }
  
  /*
@@ -399,7 +414,7 @@ void bch2_journal_reclaim_work(struct work_struct *work)
         unsigned iter, bucket_to_flush, min_nr = 0;
         u64 seq_to_flush = 0;
  
-       journal_do_discards(j);
+       bch2_journal_do_discards(j);
  
         mutex_lock(&j->reclaim_lock);
         spin_lock(&j->lock);
diff --git a/fs/bcachefs/journal_types.h b/fs/bcachefs/journal_types.h

index 2f48008820ac870d3311d47fbf0e97dd95c50ada..09b2d22230335cb33e625f3e0f1ce811bff8443c 100644 (file)
--- a/fs/bcachefs/journal_types.h
+++ b/fs/bcachefs/journal_types.h
@@ -193,9 +193,6 @@ struct journal {
                 struct journal_entry_pin_list *data;
         }                       pin;
  
-       struct journal_entry_pin *flush_in_progress;
-       wait_queue_head_t       pin_flush_wait;
-
         u64                     replay_journal_seq;
  
         struct mutex            blacklist_lock;
@@ -206,10 +203,13 @@ struct journal {
         spinlock_t              err_lock;
  
         struct delayed_work     reclaim_work;
+       struct mutex            reclaim_lock;
         unsigned long           last_flushed;
+       struct journal_entry_pin *flush_in_progress;
+       wait_queue_head_t       pin_flush_wait;
  
-       /* protects advancing ja->last_idx: */
-       struct mutex            reclaim_lock;
+       /* protects advancing ja->discard_idx: */
+       struct mutex            discard_lock;
         unsigned                write_delay_ms;
         unsigned                reclaim_delay_ms;
  
@@ -240,17 +240,15 @@ struct journal_device {
  
         unsigned                sectors_free;
  
-       /* Journal bucket we're currently writing to */
-       unsigned                cur_idx;
-
-       /* Last journal bucket that still contains an open journal entry */
-
         /*
-        * j->lock and j->reclaim_lock must both be held to modify, j->lock
-        * sufficient to read:
+        * discard_idx <= dirty_idx_ondisk <= dirty_idx <= cur_idx:
          */
-       unsigned                last_idx;
+       unsigned                discard_idx;            /* Next bucket to discard */
+       unsigned                dirty_idx_ondisk;
+       unsigned                dirty_idx;
+       unsigned                cur_idx;                /* Journal bucket we're currently writing to */
         unsigned                nr;
+
         u64                     *buckets;
  
         /* Bio for journal reads/writes to this device */
author	Kent Overstreet <kent.overstreet@gmail.com>
	Sun, 3 Mar 2019 20:15:55 +0000 (15:15 -0500)
committer	Kent Overstreet <kent.overstreet@linux.dev>
	Sun, 22 Oct 2023 21:08:17 +0000 (17:08 -0400)
fs/bcachefs/journal.c		patch \| blob \| blame \| history
fs/bcachefs/journal_io.c		patch \| blob \| blame \| history
fs/bcachefs/journal_reclaim.c		patch \| blob \| blame \| history
fs/bcachefs/journal_types.h		patch \| blob \| blame \| history