kstat_named_t dmu_tx_dirty_throttle;
kstat_named_t dmu_tx_dirty_delay;
kstat_named_t dmu_tx_dirty_over_max;
+ kstat_named_t dmu_tx_wrlog_over_max;
kstat_named_t dmu_tx_dirty_frees_delay;
kstat_named_t dmu_tx_quota;
} dmu_tx_stats_t;
#include <sys/rrwlock.h>
#include <sys/dsl_synctask.h>
#include <sys/mmp.h>
+#include <sys/aggsum.h>
#ifdef __cplusplus
extern "C" {
extern unsigned long zfs_dirty_data_max;
extern unsigned long zfs_dirty_data_max_max;
+extern unsigned long zfs_wrlog_data_max;
extern int zfs_dirty_data_sync_percent;
extern int zfs_dirty_data_max_percent;
extern int zfs_dirty_data_max_max_percent;
uint64_t dp_mos_compressed_delta;
uint64_t dp_mos_uncompressed_delta;
+ aggsum_t dp_wrlog_pertxg[TXG_SIZE];
+ aggsum_t dp_wrlog_total;
+
/*
* Time of most recently scheduled (furthest in the future)
* wakeup for delayed transactions.
uint64_t dsl_pool_adjustedsize(dsl_pool_t *dp, zfs_space_check_t slop_policy);
uint64_t dsl_pool_unreserved_space(dsl_pool_t *dp,
zfs_space_check_t slop_policy);
+void dsl_pool_wrlog_count(dsl_pool_t *dp, int64_t size, uint64_t txg);
+boolean_t dsl_pool_wrlog_over_max(dsl_pool_t *dp);
void dsl_pool_dirty_space(dsl_pool_t *dp, int64_t space, dmu_tx_t *tx);
void dsl_pool_undirty_space(dsl_pool_t *dp, int64_t space, uint64_t txg);
void dsl_free(dsl_pool_t *dp, uint64_t txg, const blkptr_t *bpp);
This should be less than
.Sy zfs_vdev_async_write_active_min_dirty_percent .
.
+.It Sy zfs_wrlog_data_max Ns = Pq int
+The upper limit of write-transaction zil log data size in bytes.
+Once it is reached, write operation is blocked, until log data is cleared out
+after transaction group sync. Because of some overhead, it should be set
+at least 2 times the size of
+.Sy zfs_dirty_data_max
+.No to prevent harming normal write throughput.
+It also should be smaller than the size of the slog device if slog is present.
+.Pp
+Defaults to
+.Sy zfs_dirty_data_max*2
+.
.It Sy zfs_fallocate_reserve_percent Ns = Ns Sy 110 Ns % Pq uint
Since ZFS is a copy-on-write filesystem with snapshots, blocks cannot be
preallocated for a file in order to guarantee that later writes will not
zfs_dirty_data_max = MIN(zfs_dirty_data_max,
zfs_dirty_data_max_max);
}
+
+ if (zfs_wrlog_data_max == 0) {
+
+ /*
+ * dp_wrlog_total is reduced for each txg at the end of
+ * spa_sync(). However, dp_dirty_total is reduced every time
+ * a block is written out. Thus under normal operation,
+ * dp_wrlog_total could grow 2 times as big as
+ * zfs_dirty_data_max.
+ */
+ zfs_wrlog_data_max = zfs_dirty_data_max * 2;
+ }
}
void
{ "dmu_tx_dirty_throttle", KSTAT_DATA_UINT64 },
{ "dmu_tx_dirty_delay", KSTAT_DATA_UINT64 },
{ "dmu_tx_dirty_over_max", KSTAT_DATA_UINT64 },
+ { "dmu_tx_wrlog_over_max", KSTAT_DATA_UINT64 },
{ "dmu_tx_dirty_frees_delay", KSTAT_DATA_UINT64 },
{ "dmu_tx_quota", KSTAT_DATA_UINT64 },
};
return (SET_ERROR(ERESTART));
}
+ if (!tx->tx_dirty_delayed &&
+ dsl_pool_wrlog_over_max(tx->tx_pool)) {
+ DMU_TX_STAT_BUMP(dmu_tx_wrlog_over_max);
+ return (SET_ERROR(ERESTART));
+ }
+
if (!tx->tx_dirty_delayed &&
dsl_pool_need_dirty_delay(tx->tx_pool)) {
tx->tx_wait_dirty = B_TRUE;
int zfs_dirty_data_max_percent = 10;
int zfs_dirty_data_max_max_percent = 25;
+/*
+ * zfs_wrlog_data_max, the upper limit of TX_WRITE log data.
+ * Once it is reached, write operation is blocked,
+ * until log data is cleared out after txg sync.
+ * It only counts TX_WRITE log with WR_COPIED or WR_NEED_COPY.
+ */
+unsigned long zfs_wrlog_data_max = 0;
+
/*
* If there's at least this much dirty data (as a percentage of
* zfs_dirty_data_max), push out a txg. This should be less than
mutex_init(&dp->dp_lock, NULL, MUTEX_DEFAULT, NULL);
cv_init(&dp->dp_spaceavail_cv, NULL, CV_DEFAULT, NULL);
+ aggsum_init(&dp->dp_wrlog_total, 0);
+ for (int i = 0; i < TXG_SIZE; i++) {
+ aggsum_init(&dp->dp_wrlog_pertxg[i], 0);
+ }
+
dp->dp_zrele_taskq = taskq_create("z_zrele", 100, defclsyspri,
boot_ncpus * 8, INT_MAX, TASKQ_PREPOPULATE | TASKQ_DYNAMIC |
TASKQ_THREADS_CPU_PCT);
rrw_destroy(&dp->dp_config_rwlock);
mutex_destroy(&dp->dp_lock);
cv_destroy(&dp->dp_spaceavail_cv);
+
+ ASSERT0(aggsum_value(&dp->dp_wrlog_total));
+ aggsum_fini(&dp->dp_wrlog_total);
+ for (int i = 0; i < TXG_SIZE; i++) {
+ ASSERT0(aggsum_value(&dp->dp_wrlog_pertxg[i]));
+ aggsum_fini(&dp->dp_wrlog_pertxg[i]);
+ }
+
taskq_destroy(dp->dp_unlinked_drain_taskq);
taskq_destroy(dp->dp_zrele_taskq);
if (dp->dp_blkstats != NULL) {
cv_signal(&dp->dp_spaceavail_cv);
}
+void
+dsl_pool_wrlog_count(dsl_pool_t *dp, int64_t size, uint64_t txg)
+{
+ ASSERT3S(size, >=, 0);
+
+ aggsum_add(&dp->dp_wrlog_pertxg[txg & TXG_MASK], size);
+ aggsum_add(&dp->dp_wrlog_total, size);
+
+ /* Choose a value slightly bigger than min dirty sync bytes */
+ uint64_t sync_min =
+ zfs_dirty_data_max * (zfs_dirty_data_sync_percent + 10) / 100;
+ if (aggsum_compare(&dp->dp_wrlog_pertxg[txg & TXG_MASK], sync_min) > 0)
+ txg_kick(dp, txg);
+}
+
+boolean_t
+dsl_pool_wrlog_over_max(dsl_pool_t *dp)
+{
+ return (aggsum_compare(&dp->dp_wrlog_total, zfs_wrlog_data_max) > 0);
+}
+
+static void
+dsl_pool_wrlog_clear(dsl_pool_t *dp, uint64_t txg)
+{
+ int64_t delta;
+ delta = -(int64_t)aggsum_value(&dp->dp_wrlog_pertxg[txg & TXG_MASK]);
+ aggsum_add(&dp->dp_wrlog_pertxg[txg & TXG_MASK], delta);
+ aggsum_add(&dp->dp_wrlog_total, delta);
+}
+
#ifdef ZFS_DEBUG
static boolean_t
dsl_early_sync_task_verify(dsl_pool_t *dp, uint64_t txg)
ASSERT(!dmu_objset_is_dirty(zilog->zl_os, txg));
dmu_buf_rele(ds->ds_dbuf, zilog);
}
+
+ dsl_pool_wrlog_clear(dp, txg);
+
ASSERT(!dmu_objset_is_dirty(dp->dp_meta_objset, txg));
}
ZFS_MODULE_PARAM(zfs, zfs_, dirty_data_max, ULONG, ZMOD_RW,
"Determines the dirty space limit");
+ZFS_MODULE_PARAM(zfs, zfs_, wrlog_data_max, ULONG, ZMOD_RW,
+ "The size limit of write-transaction zil log data");
+
/* zfs_dirty_data_max_max only applied at module load in arc_init(). */
ZFS_MODULE_PARAM(zfs, zfs_, dirty_data_max_max, ULONG, ZMOD_RD,
"zfs_dirty_data_max upper bound in bytes");
itx_wr_state_t write_state;
uintptr_t fsync_cnt;
uint64_t gen = 0;
+ ssize_t size = resid;
if (zil_replaying(zilog, tx) || zp->z_unlinked ||
zfs_xattr_owner_unlinked(zp)) {
off += len;
resid -= len;
}
+
+ if (write_state == WR_COPIED || write_state == WR_NEED_COPY) {
+ dsl_pool_wrlog_count(zilog->zl_dmu_pool, size, tx->tx_txg);
+ }
}
/*
#include <sys/zfs_rlock.h>
#include <sys/spa_impl.h>
#include <sys/zvol.h>
-
#include <sys/zvol_impl.h>
-
unsigned int zvol_inhibit_dev = 0;
unsigned int zvol_volmode = ZFS_VOLMODE_GEOM;
uint32_t blocksize = zv->zv_volblocksize;
zilog_t *zilog = zv->zv_zilog;
itx_wr_state_t write_state;
+ uint64_t sz = size;
if (zil_replaying(zilog, tx))
return;
offset += len;
size -= len;
}
+
+ if (write_state == WR_COPIED || write_state == WR_NEED_COPY) {
+ dsl_pool_wrlog_count(zilog->zl_dmu_pool, sz, tx->tx_txg);
+ }
}
/*