static int btree_key_cache_flush_pos(struct btree_trans *trans,
struct bkey_cached_key key,
u64 journal_seq,
+ unsigned commit_flags,
bool evict)
{
struct bch_fs *c = trans->c;
BTREE_INSERT_NOUNLOCK|
BTREE_INSERT_NOCHECK_RW|
BTREE_INSERT_NOFAIL|
- BTREE_INSERT_JOURNAL_RESERVED|
- BTREE_INSERT_JOURNAL_RECLAIM);
+ (ck->journal.seq == journal_last_seq(j)
+ ? BTREE_INSERT_JOURNAL_RESERVED
+ : 0)|
+ commit_flags);
err:
if (ret == -EINTR)
goto retry;
+ if (ret == -EAGAIN)
+ goto out;
+
if (ret) {
bch2_fs_fatal_err_on(!bch2_journal_error(j), c,
"error flushing key cache: %i", ret);
return ret;
}
-static void btree_key_cache_journal_flush(struct journal *j,
- struct journal_entry_pin *pin,
- u64 seq)
+static int btree_key_cache_journal_flush(struct journal *j,
+ struct journal_entry_pin *pin,
+ u64 seq)
{
struct bch_fs *c = container_of(j, struct bch_fs, journal);
struct bkey_cached *ck =
container_of(pin, struct bkey_cached, journal);
struct bkey_cached_key key;
struct btree_trans trans;
+ int ret = 0;
int srcu_idx = srcu_read_lock(&c->btree_trans_barrier);
six_unlock_read(&ck->c.lock);
bch2_trans_init(&trans, c, 0, 0);
- btree_key_cache_flush_pos(&trans, key, seq, false);
+ ret = btree_key_cache_flush_pos(&trans, key, seq,
+ BTREE_INSERT_JOURNAL_RECLAIM, false);
bch2_trans_exit(&trans);
unlock:
srcu_read_unlock(&c->btree_trans_barrier, srcu_idx);
+
+ return ret;
}
/*
if (!bch2_btree_key_cache_find(c, id, pos))
return 0;
- return btree_key_cache_flush_pos(trans, key, 0, true);
+ return btree_key_cache_flush_pos(trans, key, 0, 0, true);
}
bool bch2_btree_insert_key_cached(struct btree_trans *trans,
struct closure cl;
int disk_res_flags = (flags & BTREE_INSERT_NOFAIL)
? BCH_DISK_RESERVATION_NOFAIL : 0;
- int journal_flags = (flags & BTREE_INSERT_JOURNAL_RESERVED)
- ? JOURNAL_RES_GET_RECLAIM : 0;
+ int journal_flags = 0;
int ret = 0;
+ if (flags & BTREE_INSERT_JOURNAL_RESERVED)
+ journal_flags |= JOURNAL_RES_GET_RESERVED;
+
closure_init_stack(&cl);
retry:
/*
bch2_trans_unlock(trans);
+ if (flags & BTREE_INSERT_JOURNAL_RECLAIM)
+ goto err;
+
ret = bch2_journal_preres_get(&c->journal, &as->journal_preres,
BTREE_UPDATE_JOURNAL_RES,
journal_flags);
return true;
}
-static void __btree_node_flush(struct journal *j, struct journal_entry_pin *pin,
+static int __btree_node_flush(struct journal *j, struct journal_entry_pin *pin,
unsigned i, u64 seq)
{
struct bch_fs *c = container_of(j, struct bch_fs, journal);
bch2_btree_node_write_cond(c, b,
(btree_current_write(b) == w && w->journal.seq == seq));
six_unlock_read(&b->c.lock);
+ return 0;
}
-static void btree_node_flush0(struct journal *j, struct journal_entry_pin *pin, u64 seq)
+static int btree_node_flush0(struct journal *j, struct journal_entry_pin *pin, u64 seq)
{
return __btree_node_flush(j, pin, 0, seq);
}
-static void btree_node_flush1(struct journal *j, struct journal_entry_pin *pin, u64 seq)
+static int btree_node_flush1(struct journal *j, struct journal_entry_pin *pin, u64 seq)
{
return __btree_node_flush(j, pin, 1, seq);
}
ret = bch2_journal_preres_get(&c->journal,
&trans->journal_preres, trans->journal_preres_u64s,
JOURNAL_RES_GET_NONBLOCK|
- ((trans->flags & BTREE_INSERT_JOURNAL_RECLAIM)
- ? JOURNAL_RES_GET_RECLAIM : 0));
+ ((trans->flags & BTREE_INSERT_JOURNAL_RESERVED)
+ ? JOURNAL_RES_GET_RESERVED : 0));
if (unlikely(ret == -EAGAIN))
ret = bch2_trans_journal_preres_get_cold(trans,
trans->journal_preres_u64s);
case BTREE_INSERT_NEED_JOURNAL_RES:
bch2_trans_unlock(trans);
+ if ((trans->flags & BTREE_INSERT_JOURNAL_RECLAIM) &&
+ !(trans->flags & BTREE_INSERT_JOURNAL_RESERVED))
+ return -EAGAIN;
+
ret = bch2_trans_journal_res_get(trans, JOURNAL_RES_GET_CHECK);
if (ret)
return ret;
#include "btree_gc.h"
#include "btree_update.h"
#include "buckets.h"
+#include "error.h"
#include "journal.h"
#include "journal_io.h"
#include "journal_reclaim.h"
if (!ret)
goto retry;
+ if ((ret == cur_entry_journal_full ||
+ ret == cur_entry_journal_pin_full) &&
+ !can_discard &&
+ j->reservations.idx == j->reservations.unwritten_idx &&
+ (flags & JOURNAL_RES_GET_RESERVED)) {
+ char *journal_debug_buf = kmalloc(4096, GFP_ATOMIC);
+
+ bch_err(c, "Journal stuck!");
+ if (journal_debug_buf) {
+ bch2_journal_debug_to_text(&_PBUF(journal_debug_buf, 4096), j);
+ bch_err(c, "%s", journal_debug_buf);
+
+ bch2_journal_pins_to_text(&_PBUF(journal_debug_buf, 4096), j);
+ bch_err(c, "Journal pins:\n%s", journal_debug_buf);
+ kfree(journal_debug_buf);
+ }
+
+ bch2_fatal_error(c);
+ dump_stack();
+ }
+
/*
* Journal is full - can't rely on reclaim from work item due to
* freezing:
"last_seq_ondisk:\t%llu\n"
"flushed_seq_ondisk:\t%llu\n"
"prereserved:\t\t%u/%u\n"
+ "each entry reserved:\t%u\n"
"nr flush writes:\t%llu\n"
"nr noflush writes:\t%llu\n"
"nr direct reclaim:\t%llu\n"
j->flushed_seq_ondisk,
j->prereserved.reserved,
j->prereserved.remaining,
+ j->entry_u64s_reserved,
j->nr_flush_writes,
j->nr_noflush_writes,
j->nr_direct_reclaim,
#define JOURNAL_RES_GET_NONBLOCK (1 << 0)
#define JOURNAL_RES_GET_CHECK (1 << 1)
#define JOURNAL_RES_GET_RESERVED (1 << 2)
-#define JOURNAL_RES_GET_RECLAIM (1 << 3)
static inline int journal_res_get_fast(struct journal *j,
struct journal_res *res,
* into the reclaim path and deadlock:
*/
- if (!(flags & JOURNAL_RES_GET_RECLAIM) &&
+ if (!(flags & JOURNAL_RES_GET_RESERVED) &&
new.reserved > new.remaining)
return 0;
} while ((v = atomic64_cmpxchg(&j->prereserved.counter,
u64s_remaining = (u64) clean << 6;
u64s_remaining -= (u64) total << 3;
u64s_remaining = max(0LL, u64s_remaining);
- u64s_remaining /= 2;
+ u64s_remaining /= 4;
u64s_remaining = min_t(u64, u64s_remaining, U32_MAX);
out:
j->cur_entry_sectors = !ret ? j->space[journal_space_discarded].next_entry : 0;
if (!journal_pin_active(pin))
return;
+ if (j->flush_in_progress == pin)
+ j->flush_in_progress_dropped = true;
+
pin_list = journal_seq_pin(j, pin->seq);
pin->seq = 0;
list_del_init(&pin->list);
struct journal_entry_pin_list *pin_list;
struct journal_entry_pin *ret = NULL;
- if (!test_bit(JOURNAL_RECLAIM_STARTED, &j->flags))
- return NULL;
-
- spin_lock(&j->lock);
-
fifo_for_each_entry_ptr(pin_list, &j->pin, *seq)
if (*seq > max_seq ||
(ret = list_first_entry_or_null(&pin_list->list,
struct journal_entry_pin, list)))
break;
- if (ret) {
- list_move(&ret->list, &pin_list->flushed);
- BUG_ON(j->flush_in_progress);
- j->flush_in_progress = ret;
- }
-
- spin_unlock(&j->lock);
-
return ret;
}
/* returns true if we did work */
-static u64 journal_flush_pins(struct journal *j, u64 seq_to_flush,
- unsigned min_nr)
+static size_t journal_flush_pins(struct journal *j, u64 seq_to_flush,
+ unsigned min_nr)
{
struct journal_entry_pin *pin;
- u64 seq, ret = 0;
+ size_t nr_flushed = 0;
+ journal_pin_flush_fn flush_fn;
+ u64 seq;
+ int err;
+
+ if (!test_bit(JOURNAL_RECLAIM_STARTED, &j->flags))
+ return 0;
lockdep_assert_held(&j->reclaim_lock);
j->last_flushed = jiffies;
+ spin_lock(&j->lock);
pin = journal_get_next_pin(j, min_nr
? U64_MAX : seq_to_flush, &seq);
+ if (pin) {
+ BUG_ON(j->flush_in_progress);
+ j->flush_in_progress = pin;
+ j->flush_in_progress_dropped = false;
+ flush_fn = pin->flush;
+ }
+ spin_unlock(&j->lock);
+
if (!pin)
break;
if (min_nr)
min_nr--;
- pin->flush(j, pin, seq);
+ err = flush_fn(j, pin, seq);
- BUG_ON(j->flush_in_progress != pin);
+ spin_lock(&j->lock);
+ /* Pin might have been dropped or rearmed: */
+ if (likely(!err && !j->flush_in_progress_dropped))
+ list_move(&pin->list, &journal_seq_pin(j, seq)->flushed);
j->flush_in_progress = NULL;
+ j->flush_in_progress_dropped = false;
+ spin_unlock(&j->lock);
+
wake_up(&j->pin_flush_wait);
- ret++;
+
+ if (err)
+ break;
+
+ nr_flushed++;
}
- return ret;
+ return nr_flushed;
}
static u64 journal_seq_to_flush(struct journal *j)
{
struct bch_fs *c = container_of(j, struct bch_fs, journal);
bool kthread = (current->flags & PF_KTHREAD) != 0;
- u64 seq_to_flush, nr_flushed = 0;
- size_t min_nr;
+ u64 seq_to_flush;
+ size_t min_nr, nr_flushed;
unsigned flags;
int ret = 0;
struct journal;
struct journal_entry_pin;
-typedef void (*journal_pin_flush_fn)(struct journal *j,
+typedef int (*journal_pin_flush_fn)(struct journal *j,
struct journal_entry_pin *, u64);
struct journal_entry_pin {
unsigned long last_flushed;
struct journal_entry_pin *flush_in_progress;
+ bool flush_in_progress_dropped;
wait_queue_head_t pin_flush_wait;
/* protects advancing ja->discard_idx: */