while (ja->nr < nr) {
struct open_bucket *ob = NULL;
+ unsigned pos;
long bucket;
if (new_fs) {
preempt_disable();
}
- __array_insert_item(ja->buckets, ja->nr, ja->last_idx);
- __array_insert_item(ja->bucket_seq, ja->nr, ja->last_idx);
- __array_insert_item(journal_buckets->buckets, ja->nr, ja->last_idx);
+ pos = ja->nr ? (ja->cur_idx + 1) % ja->nr : 0;
+ __array_insert_item(ja->buckets, ja->nr, pos);
+ __array_insert_item(ja->bucket_seq, ja->nr, pos);
+ __array_insert_item(journal_buckets->buckets, ja->nr, pos);
+ ja->nr++;
- ja->buckets[ja->last_idx] = bucket;
- ja->bucket_seq[ja->last_idx] = 0;
- journal_buckets->buckets[ja->last_idx] = cpu_to_le64(bucket);
+ ja->buckets[pos] = bucket;
+ ja->bucket_seq[pos] = 0;
+ journal_buckets->buckets[pos] = cpu_to_le64(bucket);
- if (ja->last_idx < ja->nr) {
- if (ja->cur_idx >= ja->last_idx)
- ja->cur_idx++;
- ja->last_idx++;
- }
- ja->nr++;
+ if (pos <= ja->discard_idx)
+ ja->discard_idx = (ja->discard_idx + 1) % ja->nr;
+ if (pos <= ja->dirty_idx_ondisk)
+ ja->dirty_idx_ondisk = (ja->dirty_idx_ondisk + 1) % ja->nr;
+ if (pos <= ja->dirty_idx)
+ ja->dirty_idx = (ja->dirty_idx + 1) % ja->nr;
+ if (pos <= ja->cur_idx)
+ ja->cur_idx = (ja->cur_idx + 1) % ja->nr;
bch2_mark_metadata_bucket(c, ca, bucket, BCH_DATA_JOURNAL,
ca->mi.bucket_size,
mutex_init(&j->blacklist_lock);
INIT_LIST_HEAD(&j->seq_blacklist);
mutex_init(&j->reclaim_lock);
+ mutex_init(&j->discard_lock);
lockdep_init_map(&j->res_map, "journal res", &res_key, 0);
"dev %u:\n"
"\tnr\t\t%u\n"
"\tavailable\t%u:%u\n"
- "\tcur_idx\t\t%u (seq %llu)\n"
- "\tlast_idx\t%u (seq %llu)\n",
+ "\tdiscard_idx\t\t%u\n"
+ "\tdirty_idx_ondisk\t%u (seq %llu)\n"
+ "\tdirty_idx\t\t%u (seq %llu)\n"
+ "\tcur_idx\t\t%u (seq %llu)\n",
iter, ja->nr,
bch2_journal_dev_buckets_available(j, ja),
ja->sectors_free,
- ja->cur_idx, ja->bucket_seq[ja->cur_idx],
- ja->last_idx, ja->bucket_seq[ja->last_idx]);
+ ja->discard_idx,
+ ja->dirty_idx_ondisk, ja->bucket_seq[ja->dirty_idx_ondisk],
+ ja->dirty_idx, ja->bucket_seq[ja->dirty_idx],
+ ja->cur_idx, ja->bucket_seq[ja->cur_idx]);
}
spin_unlock(&j->lock);
ja->sectors_free = 0;
/*
- * Set last_idx to indicate the entire journal is full and needs to be
+ * Set dirty_idx to indicate the entire journal is full and needs to be
* reclaimed - journal reclaim will immediately reclaim whatever isn't
* pinned when it first runs:
*/
- ja->last_idx = (ja->cur_idx + 1) % ja->nr;
+ ja->discard_idx = ja->dirty_idx_ondisk =
+ ja->dirty_idx = (ja->cur_idx + 1) % ja->nr;
out:
kvpfree(buf.data, buf.size);
percpu_ref_put(&ca->io_ref);
goto err;
spin_lock(&j->lock);
- j->seq_ondisk = seq;
- j->last_seq_ondisk = last_seq;
-
if (seq >= j->pin.front)
journal_seq_pin(j, seq)->devs = devs;
+ j->seq_ondisk = seq;
+ j->last_seq_ondisk = last_seq;
+ bch2_journal_space_available(j);
+
/*
* Updating last_seq_ondisk may let bch2_journal_reclaim_work() discard
* more buckets:
{
struct bch_fs *c = container_of(j, struct bch_fs, journal);
unsigned next = (ja->cur_idx + 1) % ja->nr;
- unsigned available = (ja->last_idx + ja->nr - next) % ja->nr;
+ unsigned available = (ja->discard_idx + ja->nr - next) % ja->nr;
/*
* Allocator startup needs some journal space before we can do journal
* replay:
*/
- if (available &&
- test_bit(BCH_FS_ALLOCATOR_STARTED, &c->flags))
- available--;
+ if (available && test_bit(BCH_FS_ALLOCATOR_STARTED, &c->flags))
+ --available;
/*
* Don't use the last bucket unless writing the new last_seq
* will make another bucket available:
*/
- if (available &&
- journal_last_seq(j) <= ja->bucket_seq[ja->last_idx])
+ if (available && ja->dirty_idx_ondisk == ja->dirty_idx)
--available;
return available;
for_each_member_device_rcu(ca, c, i,
&c->rw_devs[BCH_DATA_JOURNAL]) {
struct journal_device *ja = &ca->journal;
- unsigned buckets_this_device, sectors_this_device;
if (!ja->nr)
continue;
+ while (ja->dirty_idx != ja->cur_idx &&
+ ja->bucket_seq[ja->dirty_idx] < journal_last_seq(j))
+ ja->dirty_idx = (ja->dirty_idx + 1) % ja->nr;
+
+ while (ja->dirty_idx_ondisk != ja->dirty_idx &&
+ ja->bucket_seq[ja->dirty_idx_ondisk] < j->last_seq_ondisk)
+ ja->dirty_idx_ondisk = (ja->dirty_idx_ondisk + 1) % ja->nr;
+
nr_online++;
+ }
+
+ if (nr_online < c->opts.metadata_replicas_required) {
+ ret = -EROFS;
+ sectors_next_entry = 0;
+ goto out;
+ }
+
+ for_each_member_device_rcu(ca, c, i,
+ &c->rw_devs[BCH_DATA_JOURNAL]) {
+ struct journal_device *ja = &ca->journal;
+ unsigned buckets_this_device, sectors_this_device;
+
+ if (!ja->nr)
+ continue;
buckets_this_device = bch2_journal_dev_buckets_available(j, ja);
sectors_this_device = ja->sectors_free;
nr_devs++;
}
- rcu_read_unlock();
- if (nr_online < c->opts.metadata_replicas_required) {
- ret = -EROFS;
- sectors_next_entry = 0;
- } else if (!sectors_next_entry ||
- nr_devs < min_t(unsigned, nr_online,
- c->opts.metadata_replicas)) {
+ if (!sectors_next_entry ||
+ nr_devs < min_t(unsigned, nr_online, c->opts.metadata_replicas)) {
ret = -ENOSPC;
sectors_next_entry = 0;
} else if (!fifo_free(&j->pin)) {
ret = -ENOSPC;
sectors_next_entry = 0;
}
+out:
+ rcu_read_unlock();
j->cur_entry_sectors = sectors_next_entry;
j->cur_entry_error = ret;
bool ret;
spin_lock(&j->lock);
- ret = ja->nr &&
- ja->last_idx != ja->cur_idx &&
- ja->bucket_seq[ja->last_idx] < j->last_seq_ondisk;
+ ret = ja->discard_idx != ja->dirty_idx_ondisk;
spin_unlock(&j->lock);
return ret;
}
/*
- * Advance ja->last_idx as long as it points to buckets that are no longer
+ * Advance ja->discard_idx as long as it points to buckets that are no longer
* dirty, issuing discards if necessary:
*/
-static void journal_do_discards(struct journal *j)
+static void bch2_journal_do_discards(struct journal *j)
{
struct bch_fs *c = container_of(j, struct bch_fs, journal);
struct bch_dev *ca;
unsigned iter;
- mutex_lock(&j->reclaim_lock);
+ mutex_lock(&j->discard_lock);
for_each_rw_member(ca, c, iter) {
struct journal_device *ja = &ca->journal;
bdev_max_discard_sectors(ca->disk_sb.bdev))
blkdev_issue_discard(ca->disk_sb.bdev,
bucket_to_sector(ca,
- ja->buckets[ja->last_idx]),
+ ja->buckets[ja->discard_idx]),
ca->mi.bucket_size, GFP_NOIO);
spin_lock(&j->lock);
- ja->last_idx = (ja->last_idx + 1) % ja->nr;
+ ja->discard_idx = (ja->discard_idx + 1) % ja->nr;
bch2_journal_space_available(j);
spin_unlock(&j->lock);
}
}
- mutex_unlock(&j->reclaim_lock);
+ mutex_unlock(&j->discard_lock);
}
/*
unsigned iter, bucket_to_flush, min_nr = 0;
u64 seq_to_flush = 0;
- journal_do_discards(j);
+ bch2_journal_do_discards(j);
mutex_lock(&j->reclaim_lock);
spin_lock(&j->lock);
struct journal_entry_pin_list *data;
} pin;
- struct journal_entry_pin *flush_in_progress;
- wait_queue_head_t pin_flush_wait;
-
u64 replay_journal_seq;
struct mutex blacklist_lock;
spinlock_t err_lock;
struct delayed_work reclaim_work;
+ struct mutex reclaim_lock;
unsigned long last_flushed;
+ struct journal_entry_pin *flush_in_progress;
+ wait_queue_head_t pin_flush_wait;
- /* protects advancing ja->last_idx: */
- struct mutex reclaim_lock;
+ /* protects advancing ja->discard_idx: */
+ struct mutex discard_lock;
unsigned write_delay_ms;
unsigned reclaim_delay_ms;
unsigned sectors_free;
- /* Journal bucket we're currently writing to */
- unsigned cur_idx;
-
- /* Last journal bucket that still contains an open journal entry */
-
/*
- * j->lock and j->reclaim_lock must both be held to modify, j->lock
- * sufficient to read:
+ * discard_idx <= dirty_idx_ondisk <= dirty_idx <= cur_idx:
*/
- unsigned last_idx;
+ unsigned discard_idx; /* Next bucket to discard */
+ unsigned dirty_idx_ondisk;
+ unsigned dirty_idx;
+ unsigned cur_idx; /* Journal bucket we're currently writing to */
unsigned nr;
+
u64 *buckets;
/* Bio for journal reads/writes to this device */