/* END CSTYLED */
DEFINE_DELAY_MINTIME_EVENT(zfs_delay__mintime);
+/* BEGIN CSTYLED */
+DECLARE_EVENT_CLASS(zfs_free_long_range_class,
+ TP_PROTO(uint64_t long_free_dirty_all_txgs, uint64_t chunk_len, \
+ uint64_t txg),
+ TP_ARGS(long_free_dirty_all_txgs, chunk_len, txg),
+ TP_STRUCT__entry(
+ __field(uint64_t, long_free_dirty_all_txgs)
+ __field(uint64_t, chunk_len)
+ __field(uint64_t, txg)
+ ),
+ TP_fast_assign(
+ __entry->long_free_dirty_all_txgs = long_free_dirty_all_txgs;
+ __entry->chunk_len = chunk_len;
+ __entry->txg = txg;
+ ),
+ TP_printk("long_free_dirty_all_txgs %llu chunk_len %llu txg %llu",
+ __entry->long_free_dirty_all_txgs,
+ __entry->chunk_len, __entry->txg)
+);
+/* END CSTYLED */
+
+/* BEGIN CSTYLED */
+#define DEFINE_FREE_LONG_RANGE_EVENT(name) \
+DEFINE_EVENT(zfs_free_long_range_class, name, \
+ TP_PROTO(uint64_t long_free_dirty_all_txgs, \
+ uint64_t chunk_len, uint64_t txg), \
+ TP_ARGS(long_free_dirty_all_txgs, chunk_len, txg))
+/* END CSTYLED */
+DEFINE_FREE_LONG_RANGE_EVENT(zfs_free__long__range);
+
#endif /* _TRACE_DMU_H */
#undef TRACE_INCLUDE_PATH
#include <sys/sa.h>
#include <sys/zfeature.h>
#include <sys/abd.h>
+#include <sys/trace_dmu.h>
#ifdef _KERNEL
#include <sys/vmsystm.h>
#include <sys/zfs_znode.h>
*/
int zfs_nopwrite_enabled = 1;
+/*
+ * Tunable to control percentage of dirtied blocks from frees in one TXG.
+ * After this threshold is crossed, additional dirty blocks from frees
+ * wait until the next TXG.
+ * A value of zero will disable this throttle.
+ */
+uint32_t zfs_per_txg_dirty_frees_percent = 30;
+
const dmu_object_type_info_t dmu_ot[DMU_OT_NUMTYPES] = {
{ DMU_BSWAP_UINT8, TRUE, "unallocated" },
{ DMU_BSWAP_ZAP, TRUE, "object directory" },
{
uint64_t object_size;
int err;
+ uint64_t dirty_frees_threshold;
+ dsl_pool_t *dp = dmu_objset_pool(os);
+ int t;
if (dn == NULL)
return (SET_ERROR(EINVAL));
if (offset >= object_size)
return (0);
+ if (zfs_per_txg_dirty_frees_percent <= 100)
+ dirty_frees_threshold =
+ zfs_per_txg_dirty_frees_percent * zfs_dirty_data_max / 100;
+ else
+ dirty_frees_threshold = zfs_dirty_data_max / 4;
+
if (length == DMU_OBJECT_END || offset + length > object_size)
length = object_size - offset;
while (length != 0) {
- uint64_t chunk_end, chunk_begin;
+ uint64_t chunk_end, chunk_begin, chunk_len;
+ uint64_t long_free_dirty_all_txgs = 0;
dmu_tx_t *tx;
if (dmu_objset_zfs_unmounting(dn->dn_objset))
ASSERT3U(chunk_begin, >=, offset);
ASSERT3U(chunk_begin, <=, chunk_end);
+ chunk_len = chunk_end - chunk_begin;
+
+ mutex_enter(&dp->dp_lock);
+ for (t = 0; t < TXG_SIZE; t++) {
+ long_free_dirty_all_txgs +=
+ dp->dp_long_free_dirty_pertxg[t];
+ }
+ mutex_exit(&dp->dp_lock);
+
+ /*
+ * To avoid filling up a TXG with just frees wait for
+ * the next TXG to open before freeing more chunks if
+ * we have reached the threshold of frees
+ */
+ if (dirty_frees_threshold != 0 &&
+ long_free_dirty_all_txgs >= dirty_frees_threshold) {
+ txg_wait_open(dp, 0);
+ continue;
+ }
+
tx = dmu_tx_create(os);
- dmu_tx_hold_free(tx, dn->dn_object,
- chunk_begin, chunk_end - chunk_begin);
+ dmu_tx_hold_free(tx, dn->dn_object, chunk_begin, chunk_len);
/*
* Mark this transaction as typically resulting in a net
dmu_tx_abort(tx);
return (err);
}
- dnode_free_range(dn, chunk_begin, chunk_end - chunk_begin, tx);
+
+ mutex_enter(&dp->dp_lock);
+ dp->dp_long_free_dirty_pertxg[dmu_tx_get_txg(tx) & TXG_MASK] +=
+ chunk_len;
+ mutex_exit(&dp->dp_lock);
+ DTRACE_PROBE3(free__long__range,
+ uint64_t, long_free_dirty_all_txgs, uint64_t, chunk_len,
+ uint64_t, dmu_tx_get_txg(tx));
+ dnode_free_range(dn, chunk_begin, chunk_len, tx);
dmu_tx_commit(tx);
- length -= chunk_end - chunk_begin;
+ length -= chunk_len;
}
return (0);
}
* Copyright (c) 2011, 2015 by Delphix. All rights reserved.
* Copyright (c) 2013 Steven Hartland. All rights reserved.
* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
+ * Copyright 2016 Nexenta Systems, Inc. All rights reserved.
*/
#include <sys/dsl_pool.h>
*/
dsl_pool_undirty_space(dp, dp->dp_dirty_pertxg[txg & TXG_MASK], txg);
+ /*
+ * Update the long range free counter after
+ * we're done syncing user data
+ */
+ mutex_enter(&dp->dp_lock);
+ ASSERT(spa_sync_pass(dp->dp_spa) == 1 ||
+ dp->dp_long_free_dirty_pertxg[txg & TXG_MASK] == 0);
+ dp->dp_long_free_dirty_pertxg[txg & TXG_MASK] = 0;
+ mutex_exit(&dp->dp_lock);
+
/*
* After the data blocks have been written (ensured by the zio_wait()
* above), update the user/group space accounting.