(currently 10 checksums over 10 seconds)
or else the daemon may not trigger any action.
.
-.It Sy zfs_commit_timeout_pct Ns = Ns Sy 5 Ns % Pq uint
+.It Sy zfs_commit_timeout_pct Ns = Ns Sy 10 Ns % Pq uint
This controls the amount of time that a ZIL block (lwb) will remain "open"
when it isn't "full", and it has a thread waiting for it to be committed to
stable storage.
It tunes a tradeoff between additional memory copy and possibly worse log
space efficiency vs additional range lock/unlock.
.
-.It Sy zil_min_commit_timeout Ns = Ns Sy 5000 Pq u64
-This sets the minimum delay in nanoseconds ZIL care to delay block commit,
-waiting for more records.
-If ZIL writes are too fast, kernel may not be able sleep for so short interval,
-increasing log latency above allowed by
-.Sy zfs_commit_timeout_pct .
-.
.It Sy zil_nocacheflush Ns = Ns Sy 0 Ns | Ns 1 Pq int
Disable the cache flush commands that are normally sent to disk by
the ZIL after an LWB write has completed.
* committed to stable storage. Please refer to the zil_commit_waiter()
* function (and the comments within it) for more details.
*/
-static uint_t zfs_commit_timeout_pct = 5;
-
-/*
- * Minimal time we care to delay commit waiting for more ZIL records.
- * At least FreeBSD kernel can't sleep for less than 2us at its best.
- * So requests to sleep for less then 5us is a waste of CPU time with
- * a risk of significant log latency increase due to oversleep.
- */
-static uint64_t zil_min_commit_timeout = 5000;
+static uint_t zfs_commit_timeout_pct = 10;
/*
* See zil.h for more information about these fields.
ASSERT(list_is_empty(&zilog->zl_lwb_list));
}
+static void
+zil_burst_done(zilog_t *zilog)
+{
+ if (!list_is_empty(&zilog->zl_itx_commit_list) ||
+ zilog->zl_cur_used == 0)
+ return;
+
+ if (zilog->zl_parallel)
+ zilog->zl_parallel--;
+
+ zilog->zl_cur_used = 0;
+}
+
/*
* This function will traverse the commit list, creating new lwbs as
* needed, and committing the itxs from the commit list to these newly
list_t nolwb_waiters;
lwb_t *lwb, *plwb;
itx_t *itx;
- boolean_t first = B_TRUE;
ASSERT(MUTEX_HELD(&zilog->zl_issuer_lock));
zil_commit_activate_saxattr_feature(zilog);
ASSERT(lwb->lwb_state == LWB_STATE_NEW ||
lwb->lwb_state == LWB_STATE_OPENED);
- first = (lwb->lwb_state == LWB_STATE_NEW) &&
- ((plwb = list_prev(&zilog->zl_lwb_list, lwb)) == NULL ||
- plwb->lwb_state == LWB_STATE_FLUSH_DONE);
+
+ /*
+ * If the lwb is still opened, it means the workload is really
+ * multi-threaded and we won the chance of write aggregation.
+ * If it is not opened yet, but previous lwb is still not
+ * flushed, it still means the workload is multi-threaded, but
+ * there was too much time between the commits to aggregate, so
+ * we try aggregation next times, but without too much hopes.
+ */
+ if (lwb->lwb_state == LWB_STATE_OPENED) {
+ zilog->zl_parallel = ZIL_BURSTS;
+ } else if ((plwb = list_prev(&zilog->zl_lwb_list, lwb))
+ != NULL && plwb->lwb_state != LWB_STATE_FLUSH_DONE) {
+ zilog->zl_parallel = MAX(zilog->zl_parallel,
+ ZIL_BURSTS / 2);
+ }
}
while ((itx = list_remove_head(&zilog->zl_itx_commit_list)) != NULL) {
* Our lwb is done, leave the rest of
* itx list to somebody else who care.
*/
- first = B_FALSE;
+ zilog->zl_parallel = ZIL_BURSTS;
break;
}
} else {
* try and pack as many itxs into as few lwbs as
* possible, without significantly impacting the latency
* of each individual itx.
- *
- * If we had no already running or open LWBs, it can be
- * the workload is single-threaded. And if the ZIL write
- * latency is very small or if the LWB is almost full, it
- * may be cheaper to bypass the delay.
*/
- if (lwb->lwb_state == LWB_STATE_OPENED && first) {
- hrtime_t sleep = zilog->zl_last_lwb_latency *
- zfs_commit_timeout_pct / 100;
- if (sleep < zil_min_commit_timeout ||
- lwb->lwb_nmax - lwb->lwb_nused <
- lwb->lwb_nmax / 8) {
- list_insert_tail(ilwbs, lwb);
- lwb = zil_lwb_write_close(zilog, lwb,
- LWB_STATE_NEW);
- zilog->zl_cur_used = 0;
- if (lwb == NULL) {
- while ((lwb = list_remove_head(ilwbs))
- != NULL)
- zil_lwb_write_issue(zilog, lwb);
- zil_commit_writer_stall(zilog);
- }
+ if (lwb->lwb_state == LWB_STATE_OPENED && !zilog->zl_parallel) {
+ list_insert_tail(ilwbs, lwb);
+ lwb = zil_lwb_write_close(zilog, lwb, LWB_STATE_NEW);
+ zil_burst_done(zilog);
+ if (lwb == NULL) {
+ while ((lwb = list_remove_head(ilwbs)) != NULL)
+ zil_lwb_write_issue(zilog, lwb);
+ zil_commit_writer_stall(zilog);
}
}
}
ASSERT3S(lwb->lwb_state, ==, LWB_STATE_CLOSED);
- /*
- * Since the lwb's zio hadn't been issued by the time this thread
- * reached its timeout, we reset the zilog's "zl_cur_used" field
- * to influence the zil block size selection algorithm.
- *
- * By having to issue the lwb's zio here, it means the size of the
- * lwb was too large, given the incoming throughput of itxs. By
- * setting "zl_cur_used" to zero, we communicate this fact to the
- * block size selection algorithm, so it can take this information
- * into account, and potentially select a smaller size for the
- * next lwb block that is allocated.
- */
- zilog->zl_cur_used = 0;
+ zil_burst_done(zilog);
if (nlwb == NULL) {
/*
ZFS_MODULE_PARAM(zfs, zfs_, commit_timeout_pct, UINT, ZMOD_RW,
"ZIL block open timeout percentage");
-ZFS_MODULE_PARAM(zfs_zil, zil_, min_commit_timeout, U64, ZMOD_RW,
- "Minimum delay we care for ZIL block commit");
-
ZFS_MODULE_PARAM(zfs_zil, zil_, replay_disable, INT, ZMOD_RW,
"Disable intent logging replay");