*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012 by Delphix. All rights reserved.
*/
#include <sys/dsl_pool.h>
#include <sys/zfs_znode.h>
#include <sys/spa_impl.h>
#include <sys/dsl_deadlist.h>
+#include <sys/bptree.h>
+#include <sys/zfeature.h>
int zfs_no_write_throttle = 0;
int zfs_write_limit_shift = 3; /* 1/8th of physical memory */
int zfs_txg_synctime_ms = 1000; /* target millisecs to sync a txg */
+int zfs_txg_history = 60; /* statistics for the last N txgs */
unsigned long zfs_write_limit_min = 32 << 20; /* min write limit is 32MB */
unsigned long zfs_write_limit_max = 0; /* max data payload per txg */
static pgcnt_t old_physmem = 0;
+static int
+dsl_pool_txg_history_update(kstat_t *ksp, int rw)
+{
+ dsl_pool_t *dp = ksp->ks_private;
+ txg_history_t *th;
+ int i = 0;
+
+ if (rw == KSTAT_WRITE)
+ return (EACCES);
+
+ if (ksp->ks_data)
+ kmem_free(ksp->ks_data, ksp->ks_data_size);
+
+ mutex_enter(&dp->dp_lock);
+
+ ksp->ks_ndata = dp->dp_txg_history_size;
+ ksp->ks_data_size = dp->dp_txg_history_size * sizeof(kstat_txg_t);
+ if (ksp->ks_data_size > 0)
+ ksp->ks_data = kmem_alloc(ksp->ks_data_size, KM_PUSHPAGE);
+
+ /* Traversed oldest to youngest for the most readable kstat output */
+ for (th = list_tail(&dp->dp_txg_history); th != NULL;
+ th = list_prev(&dp->dp_txg_history, th)) {
+ mutex_enter(&th->th_lock);
+ ASSERT3S(i + sizeof(kstat_txg_t), <=, ksp->ks_data_size);
+ memcpy(ksp->ks_data + i, &th->th_kstat, sizeof(kstat_txg_t));
+ i += sizeof(kstat_txg_t);
+ mutex_exit(&th->th_lock);
+ }
+
+ mutex_exit(&dp->dp_lock);
+
+ return (0);
+}
+
+static void
+dsl_pool_txg_history_init(dsl_pool_t *dp, uint64_t txg)
+{
+ char name[KSTAT_STRLEN];
+
+ list_create(&dp->dp_txg_history, sizeof (txg_history_t),
+ offsetof(txg_history_t, th_link));
+ dsl_pool_txg_history_add(dp, txg);
+
+ (void) snprintf(name, KSTAT_STRLEN, "txgs-%s", spa_name(dp->dp_spa));
+ dp->dp_txg_kstat = kstat_create("zfs", 0, name, "misc",
+ KSTAT_TYPE_TXG, 0, KSTAT_FLAG_VIRTUAL);
+ if (dp->dp_txg_kstat) {
+ dp->dp_txg_kstat->ks_data = NULL;
+ dp->dp_txg_kstat->ks_private = dp;
+ dp->dp_txg_kstat->ks_update = dsl_pool_txg_history_update;
+ kstat_install(dp->dp_txg_kstat);
+ }
+}
+
+static void
+dsl_pool_txg_history_destroy(dsl_pool_t *dp)
+{
+ txg_history_t *th;
+
+ if (dp->dp_txg_kstat) {
+ if (dp->dp_txg_kstat->ks_data)
+ kmem_free(dp->dp_txg_kstat->ks_data,
+ dp->dp_txg_kstat->ks_data_size);
+
+ kstat_delete(dp->dp_txg_kstat);
+ }
+
+ mutex_enter(&dp->dp_lock);
+ while ((th = list_remove_head(&dp->dp_txg_history))) {
+ dp->dp_txg_history_size--;
+ mutex_destroy(&th->th_lock);
+ kmem_free(th, sizeof(txg_history_t));
+ }
+
+ ASSERT3U(dp->dp_txg_history_size, ==, 0);
+ list_destroy(&dp->dp_txg_history);
+ mutex_exit(&dp->dp_lock);
+}
+
+txg_history_t *
+dsl_pool_txg_history_add(dsl_pool_t *dp, uint64_t txg)
+{
+ txg_history_t *th, *rm;
+
+ th = kmem_zalloc(sizeof(txg_history_t), KM_SLEEP);
+ mutex_init(&th->th_lock, NULL, MUTEX_DEFAULT, NULL);
+ th->th_kstat.txg = txg;
+ th->th_kstat.state = TXG_STATE_OPEN;
+ th->th_kstat.birth = gethrtime();
+
+ mutex_enter(&dp->dp_lock);
+
+ list_insert_head(&dp->dp_txg_history, th);
+ dp->dp_txg_history_size++;
+
+ while (dp->dp_txg_history_size > zfs_txg_history) {
+ dp->dp_txg_history_size--;
+ rm = list_remove_tail(&dp->dp_txg_history);
+ mutex_destroy(&rm->th_lock);
+ kmem_free(rm, sizeof(txg_history_t));
+ }
+
+ mutex_exit(&dp->dp_lock);
+
+ return (th);
+}
+
+/*
+ * Traversed youngest to oldest because lookups are only done for open
+ * or syncing txgs which are guaranteed to be at the head of the list.
+ * The txg_history_t structure will be returned locked.
+ */
+txg_history_t *
+dsl_pool_txg_history_get(dsl_pool_t *dp, uint64_t txg)
+{
+ txg_history_t *th;
+
+ mutex_enter(&dp->dp_lock);
+ for (th = list_head(&dp->dp_txg_history); th != NULL;
+ th = list_next(&dp->dp_txg_history, th)) {
+ if (th->th_kstat.txg == txg) {
+ mutex_enter(&th->th_lock);
+ break;
+ }
+ }
+ mutex_exit(&dp->dp_lock);
+
+ return (th);
+}
+
+void
+dsl_pool_txg_history_put(txg_history_t *th)
+{
+ mutex_exit(&th->th_lock);
+}
+
int
dsl_pool_open_special_dir(dsl_pool_t *dp, const char *name, dsl_dir_t **ddp)
{
dp->dp_iput_taskq = taskq_create("zfs_iput_taskq", 1, minclsyspri,
1, 4, 0);
+ dsl_pool_txg_history_init(dp, txg);
+
return (dp);
}
int
-dsl_pool_open(spa_t *spa, uint64_t txg, dsl_pool_t **dpp)
+dsl_pool_init(spa_t *spa, uint64_t txg, dsl_pool_t **dpp)
{
int err;
dsl_pool_t *dp = dsl_pool_open_impl(spa, txg);
+
+ err = dmu_objset_open_impl(spa, NULL, &dp->dp_meta_rootbp,
+ &dp->dp_meta_objset);
+ if (err != 0)
+ dsl_pool_close(dp);
+ else
+ *dpp = dp;
+
+ return (err);
+}
+
+int
+dsl_pool_open(dsl_pool_t *dp)
+{
+ int err;
dsl_dir_t *dd;
dsl_dataset_t *ds;
uint64_t obj;
rw_enter(&dp->dp_config_rwlock, RW_WRITER);
- err = dmu_objset_open_impl(spa, NULL, &dp->dp_meta_rootbp,
- &dp->dp_meta_objset);
- if (err)
- goto out;
-
err = zap_lookup(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
DMU_POOL_ROOT_DATASET, sizeof (uint64_t), 1,
&dp->dp_root_dir_obj);
if (err)
goto out;
- if (spa_version(spa) >= SPA_VERSION_ORIGIN) {
+ if (spa_version(dp->dp_spa) >= SPA_VERSION_ORIGIN) {
err = dsl_pool_open_special_dir(dp, ORIGIN_DIR_NAME, &dd);
if (err)
goto out;
goto out;
}
- if (spa_version(spa) >= SPA_VERSION_DEADLISTS) {
+ if (spa_version(dp->dp_spa) >= SPA_VERSION_DEADLISTS) {
err = dsl_pool_open_special_dir(dp, FREE_DIR_NAME,
&dp->dp_free_dir);
if (err)
dp->dp_meta_objset, obj));
}
+ if (spa_feature_is_active(dp->dp_spa,
+ &spa_feature_table[SPA_FEATURE_ASYNC_DESTROY])) {
+ err = zap_lookup(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
+ DMU_POOL_BPTREE_OBJ, sizeof (uint64_t), 1,
+ &dp->dp_bptree_obj);
+ if (err != 0)
+ goto out;
+ }
+
err = zap_lookup(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
DMU_POOL_TMP_USERREFS, sizeof (uint64_t), 1,
&dp->dp_tmp_userrefs_obj);
if (err)
goto out;
- err = dsl_scan_init(dp, txg);
+ err = dsl_scan_init(dp, dp->dp_tx.tx_open_txg);
out:
rw_exit(&dp->dp_config_rwlock);
- if (err)
- dsl_pool_close(dp);
- else
- *dpp = dp;
-
return (err);
}
arc_flush(dp->dp_spa);
txg_fini(dp);
dsl_scan_fini(dp);
+ dsl_pool_txg_history_destroy(dp);
rw_destroy(&dp->dp_config_rwlock);
mutex_destroy(&dp->dp_lock);
taskq_destroy(dp->dp_iput_taskq);
deadlist_enqueue_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx)
{
dsl_deadlist_t *dl = arg;
+ dsl_pool_t *dp = dmu_objset_pool(dl->dl_os);
+ rw_enter(&dp->dp_config_rwlock, RW_READER);
dsl_deadlist_insert(dl, bp, tx);
+ rw_exit(&dp->dp_config_rwlock);
return (0);
}
dsl_pool_sync_context(dsl_pool_t *dp)
{
return (curthread == dp->dp_tx.tx_sync_thread ||
- spa_get_dsl(dp->dp_spa) == NULL);
+ spa_is_initializing(dp->dp_spa));
}
uint64_t
reserved = dp->dp_space_towrite[tx->tx_txg & TXG_MASK]
+ dp->dp_tempreserved[tx->tx_txg & TXG_MASK] / 2;
- if (reserved && reserved > write_limit)
+ if (reserved && reserved > write_limit) {
+ DMU_TX_STAT_BUMP(dmu_tx_write_limit);
return (ERESTART);
+ }
}
atomic_add_64(&dp->dp_tempreserved[tx->tx_txg & TXG_MASK], space);
ASSERT(dp->dp_tmp_userrefs_obj == 0);
ASSERT(dmu_tx_is_syncing(tx));
- dp->dp_tmp_userrefs_obj = zap_create(mos, DMU_OT_USERREFS,
- DMU_OT_NONE, 0, tx);
-
- VERIFY(zap_add(mos, DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_TMP_USERREFS,
- sizeof (uint64_t), 1, &dp->dp_tmp_userrefs_obj, tx) == 0);
+ dp->dp_tmp_userrefs_obj = zap_create_link(mos, DMU_OT_USERREFS,
+ DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_TMP_USERREFS, tx);
}
static int
MODULE_PARM_DESC(zfs_write_limit_shift, "log2(fraction of memory) per txg");
module_param(zfs_txg_synctime_ms, int, 0644);
-MODULE_PARM_DESC(zfs_txg_synctime_ms, "Target milliseconds between tgx sync");
+MODULE_PARM_DESC(zfs_txg_synctime_ms, "Target milliseconds between txg sync");
+
+module_param(zfs_txg_history, int, 0644);
+MODULE_PARM_DESC(zfs_txg_history, "Historic statistics for the last N txgs");
module_param(zfs_write_limit_min, ulong, 0444);
-MODULE_PARM_DESC(zfs_write_limit_min, "Min tgx write limit");
+MODULE_PARM_DESC(zfs_write_limit_min, "Min txg write limit");
module_param(zfs_write_limit_max, ulong, 0444);
-MODULE_PARM_DESC(zfs_write_limit_max, "Max tgx write limit");
+MODULE_PARM_DESC(zfs_write_limit_max, "Max txg write limit");
module_param(zfs_write_limit_inflated, ulong, 0444);
-MODULE_PARM_DESC(zfs_write_limit_inflated, "Inflated tgx write limit");
+MODULE_PARM_DESC(zfs_write_limit_inflated, "Inflated txg write limit");
module_param(zfs_write_limit_override, ulong, 0444);
-MODULE_PARM_DESC(zfs_write_limit_override, "Override tgx write limit");
+MODULE_PARM_DESC(zfs_write_limit_override, "Override txg write limit");
#endif