]> git.proxmox.com Git - mirror_ubuntu-kernels.git/commitdiff
bcachefs: ja->discard_idx, ja->dirty_idx
authorKent Overstreet <kent.overstreet@gmail.com>
Sun, 3 Mar 2019 20:15:55 +0000 (15:15 -0500)
committerKent Overstreet <kent.overstreet@linux.dev>
Sun, 22 Oct 2023 21:08:17 +0000 (17:08 -0400)
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
fs/bcachefs/journal.c
fs/bcachefs/journal_io.c
fs/bcachefs/journal_reclaim.c
fs/bcachefs/journal_types.h

index 3b3c342b2df2c1637878ba163ac572ab48c962cb..17add726f2acea37d86224f9f4c15cfa0cabc652 100644 (file)
@@ -760,6 +760,7 @@ static int __bch2_set_nr_journal_buckets(struct bch_dev *ca, unsigned nr,
 
        while (ja->nr < nr) {
                struct open_bucket *ob = NULL;
+               unsigned pos;
                long bucket;
 
                if (new_fs) {
@@ -786,20 +787,24 @@ static int __bch2_set_nr_journal_buckets(struct bch_dev *ca, unsigned nr,
                        preempt_disable();
                }
 
-               __array_insert_item(ja->buckets,                ja->nr, ja->last_idx);
-               __array_insert_item(ja->bucket_seq,             ja->nr, ja->last_idx);
-               __array_insert_item(journal_buckets->buckets,   ja->nr, ja->last_idx);
+               pos = ja->nr ? (ja->cur_idx + 1) % ja->nr : 0;
+               __array_insert_item(ja->buckets,                ja->nr, pos);
+               __array_insert_item(ja->bucket_seq,             ja->nr, pos);
+               __array_insert_item(journal_buckets->buckets,   ja->nr, pos);
+               ja->nr++;
 
-               ja->buckets[ja->last_idx] = bucket;
-               ja->bucket_seq[ja->last_idx] = 0;
-               journal_buckets->buckets[ja->last_idx] = cpu_to_le64(bucket);
+               ja->buckets[pos] = bucket;
+               ja->bucket_seq[pos] = 0;
+               journal_buckets->buckets[pos] = cpu_to_le64(bucket);
 
-               if (ja->last_idx < ja->nr) {
-                       if (ja->cur_idx >= ja->last_idx)
-                               ja->cur_idx++;
-                       ja->last_idx++;
-               }
-               ja->nr++;
+               if (pos <= ja->discard_idx)
+                       ja->discard_idx = (ja->discard_idx + 1) % ja->nr;
+               if (pos <= ja->dirty_idx_ondisk)
+                       ja->dirty_idx_ondisk = (ja->dirty_idx_ondisk + 1) % ja->nr;
+               if (pos <= ja->dirty_idx)
+                       ja->dirty_idx = (ja->dirty_idx + 1) % ja->nr;
+               if (pos <= ja->cur_idx)
+                       ja->cur_idx = (ja->cur_idx + 1) % ja->nr;
 
                bch2_mark_metadata_bucket(c, ca, bucket, BCH_DATA_JOURNAL,
                                          ca->mi.bucket_size,
@@ -1042,6 +1047,7 @@ int bch2_fs_journal_init(struct journal *j)
        mutex_init(&j->blacklist_lock);
        INIT_LIST_HEAD(&j->seq_blacklist);
        mutex_init(&j->reclaim_lock);
+       mutex_init(&j->discard_lock);
 
        lockdep_init_map(&j->res_map, "journal res", &res_key, 0);
 
@@ -1138,13 +1144,17 @@ ssize_t bch2_journal_print_debug(struct journal *j, char *buf)
                       "dev %u:\n"
                       "\tnr\t\t%u\n"
                       "\tavailable\t%u:%u\n"
-                      "\tcur_idx\t\t%u (seq %llu)\n"
-                      "\tlast_idx\t%u (seq %llu)\n",
+                      "\tdiscard_idx\t\t%u\n"
+                      "\tdirty_idx_ondisk\t%u (seq %llu)\n"
+                      "\tdirty_idx\t\t%u (seq %llu)\n"
+                      "\tcur_idx\t\t%u (seq %llu)\n",
                       iter, ja->nr,
                       bch2_journal_dev_buckets_available(j, ja),
                       ja->sectors_free,
-                      ja->cur_idx,     ja->bucket_seq[ja->cur_idx],
-                      ja->last_idx,    ja->bucket_seq[ja->last_idx]);
+                      ja->discard_idx,
+                      ja->dirty_idx_ondisk,    ja->bucket_seq[ja->dirty_idx_ondisk],
+                      ja->dirty_idx,           ja->bucket_seq[ja->dirty_idx],
+                      ja->cur_idx,             ja->bucket_seq[ja->cur_idx]);
        }
 
        spin_unlock(&j->lock);
index d4b82344221c6e4a5b2ebef0a588c6af4df7fca1..b6a51dff09784f683000d45395ae6a2975d7fe77 100644 (file)
@@ -625,11 +625,12 @@ static void bch2_journal_read_device(struct closure *cl)
        ja->sectors_free = 0;
 
        /*
-        * Set last_idx to indicate the entire journal is full and needs to be
+        * Set dirty_idx to indicate the entire journal is full and needs to be
         * reclaimed - journal reclaim will immediately reclaim whatever isn't
         * pinned when it first runs:
         */
-       ja->last_idx = (ja->cur_idx + 1) % ja->nr;
+       ja->discard_idx = ja->dirty_idx_ondisk =
+               ja->dirty_idx = (ja->cur_idx + 1) % ja->nr;
 out:
        kvpfree(buf.data, buf.size);
        percpu_ref_put(&ca->io_ref);
@@ -1069,12 +1070,13 @@ static void journal_write_done(struct closure *cl)
                goto err;
 
        spin_lock(&j->lock);
-       j->seq_ondisk           = seq;
-       j->last_seq_ondisk      = last_seq;
-
        if (seq >= j->pin.front)
                journal_seq_pin(j, seq)->devs = devs;
 
+       j->seq_ondisk           = seq;
+       j->last_seq_ondisk      = last_seq;
+       bch2_journal_space_available(j);
+
        /*
         * Updating last_seq_ondisk may let bch2_journal_reclaim_work() discard
         * more buckets:
index 431afeab42b0054079e063a8917348f77b5e8024..3a85fb8b852697006936da5930759aa568a02775 100644 (file)
@@ -14,22 +14,20 @@ unsigned bch2_journal_dev_buckets_available(struct journal *j,
 {
        struct bch_fs *c = container_of(j, struct bch_fs, journal);
        unsigned next = (ja->cur_idx + 1) % ja->nr;
-       unsigned available = (ja->last_idx + ja->nr - next) % ja->nr;
+       unsigned available = (ja->discard_idx + ja->nr - next) % ja->nr;
 
        /*
         * Allocator startup needs some journal space before we can do journal
         * replay:
         */
-       if (available &&
-           test_bit(BCH_FS_ALLOCATOR_STARTED, &c->flags))
-               available--;
+       if (available && test_bit(BCH_FS_ALLOCATOR_STARTED, &c->flags))
+               --available;
 
        /*
         * Don't use the last bucket unless writing the new last_seq
         * will make another bucket available:
         */
-       if (available &&
-           journal_last_seq(j) <= ja->bucket_seq[ja->last_idx])
+       if (available && ja->dirty_idx_ondisk == ja->dirty_idx)
                --available;
 
        return available;
@@ -55,12 +53,34 @@ void bch2_journal_space_available(struct journal *j)
        for_each_member_device_rcu(ca, c, i,
                                   &c->rw_devs[BCH_DATA_JOURNAL]) {
                struct journal_device *ja = &ca->journal;
-               unsigned buckets_this_device, sectors_this_device;
 
                if (!ja->nr)
                        continue;
 
+               while (ja->dirty_idx != ja->cur_idx &&
+                      ja->bucket_seq[ja->dirty_idx] < journal_last_seq(j))
+                       ja->dirty_idx = (ja->dirty_idx + 1) % ja->nr;
+
+               while (ja->dirty_idx_ondisk != ja->dirty_idx &&
+                      ja->bucket_seq[ja->dirty_idx_ondisk] < j->last_seq_ondisk)
+                       ja->dirty_idx_ondisk = (ja->dirty_idx_ondisk + 1) % ja->nr;
+
                nr_online++;
+       }
+
+       if (nr_online < c->opts.metadata_replicas_required) {
+               ret = -EROFS;
+               sectors_next_entry = 0;
+               goto out;
+       }
+
+       for_each_member_device_rcu(ca, c, i,
+                                  &c->rw_devs[BCH_DATA_JOURNAL]) {
+               struct journal_device *ja = &ca->journal;
+               unsigned buckets_this_device, sectors_this_device;
+
+               if (!ja->nr)
+                       continue;
 
                buckets_this_device = bch2_journal_dev_buckets_available(j, ja);
                sectors_this_device = ja->sectors_free;
@@ -100,20 +120,17 @@ void bch2_journal_space_available(struct journal *j)
 
                nr_devs++;
        }
-       rcu_read_unlock();
 
-       if (nr_online < c->opts.metadata_replicas_required) {
-               ret = -EROFS;
-               sectors_next_entry = 0;
-       } else if (!sectors_next_entry ||
-                  nr_devs < min_t(unsigned, nr_online,
-                                  c->opts.metadata_replicas)) {
+       if (!sectors_next_entry ||
+           nr_devs < min_t(unsigned, nr_online, c->opts.metadata_replicas)) {
                ret = -ENOSPC;
                sectors_next_entry = 0;
        } else if (!fifo_free(&j->pin)) {
                ret = -ENOSPC;
                sectors_next_entry = 0;
        }
+out:
+       rcu_read_unlock();
 
        j->cur_entry_sectors    = sectors_next_entry;
        j->cur_entry_error      = ret;
@@ -129,25 +146,23 @@ static bool should_discard_bucket(struct journal *j, struct journal_device *ja)
        bool ret;
 
        spin_lock(&j->lock);
-       ret = ja->nr &&
-               ja->last_idx != ja->cur_idx &&
-               ja->bucket_seq[ja->last_idx] < j->last_seq_ondisk;
+       ret = ja->discard_idx != ja->dirty_idx_ondisk;
        spin_unlock(&j->lock);
 
        return ret;
 }
 
 /*
- * Advance ja->last_idx as long as it points to buckets that are no longer
+ * Advance ja->discard_idx as long as it points to buckets that are no longer
  * dirty, issuing discards if necessary:
  */
-static void journal_do_discards(struct journal *j)
+static void bch2_journal_do_discards(struct journal *j)
 {
        struct bch_fs *c = container_of(j, struct bch_fs, journal);
        struct bch_dev *ca;
        unsigned iter;
 
-       mutex_lock(&j->reclaim_lock);
+       mutex_lock(&j->discard_lock);
 
        for_each_rw_member(ca, c, iter) {
                struct journal_device *ja = &ca->journal;
@@ -157,18 +172,18 @@ static void journal_do_discards(struct journal *j)
                            bdev_max_discard_sectors(ca->disk_sb.bdev))
                                blkdev_issue_discard(ca->disk_sb.bdev,
                                        bucket_to_sector(ca,
-                                               ja->buckets[ja->last_idx]),
+                                               ja->buckets[ja->discard_idx]),
                                        ca->mi.bucket_size, GFP_NOIO);
 
                        spin_lock(&j->lock);
-                       ja->last_idx = (ja->last_idx + 1) % ja->nr;
+                       ja->discard_idx = (ja->discard_idx + 1) % ja->nr;
 
                        bch2_journal_space_available(j);
                        spin_unlock(&j->lock);
                }
        }
 
-       mutex_unlock(&j->reclaim_lock);
+       mutex_unlock(&j->discard_lock);
 }
 
 /*
@@ -399,7 +414,7 @@ void bch2_journal_reclaim_work(struct work_struct *work)
        unsigned iter, bucket_to_flush, min_nr = 0;
        u64 seq_to_flush = 0;
 
-       journal_do_discards(j);
+       bch2_journal_do_discards(j);
 
        mutex_lock(&j->reclaim_lock);
        spin_lock(&j->lock);
index 2f48008820ac870d3311d47fbf0e97dd95c50ada..09b2d22230335cb33e625f3e0f1ce811bff8443c 100644 (file)
@@ -193,9 +193,6 @@ struct journal {
                struct journal_entry_pin_list *data;
        }                       pin;
 
-       struct journal_entry_pin *flush_in_progress;
-       wait_queue_head_t       pin_flush_wait;
-
        u64                     replay_journal_seq;
 
        struct mutex            blacklist_lock;
@@ -206,10 +203,13 @@ struct journal {
        spinlock_t              err_lock;
 
        struct delayed_work     reclaim_work;
+       struct mutex            reclaim_lock;
        unsigned long           last_flushed;
+       struct journal_entry_pin *flush_in_progress;
+       wait_queue_head_t       pin_flush_wait;
 
-       /* protects advancing ja->last_idx: */
-       struct mutex            reclaim_lock;
+       /* protects advancing ja->discard_idx: */
+       struct mutex            discard_lock;
        unsigned                write_delay_ms;
        unsigned                reclaim_delay_ms;
 
@@ -240,17 +240,15 @@ struct journal_device {
 
        unsigned                sectors_free;
 
-       /* Journal bucket we're currently writing to */
-       unsigned                cur_idx;
-
-       /* Last journal bucket that still contains an open journal entry */
-
        /*
-        * j->lock and j->reclaim_lock must both be held to modify, j->lock
-        * sufficient to read:
+        * discard_idx <= dirty_idx_ondisk <= dirty_idx <= cur_idx:
         */
-       unsigned                last_idx;
+       unsigned                discard_idx;            /* Next bucket to discard */
+       unsigned                dirty_idx_ondisk;
+       unsigned                dirty_idx;
+       unsigned                cur_idx;                /* Journal bucket we're currently writing to */
        unsigned                nr;
+
        u64                     *buckets;
 
        /* Bio for journal reads/writes to this device */