/*
* Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012 by Delphix. All rights reserved.
*/
#include <sys/zfs_context.h>
#include <sys/zio_compress.h>
#include <sys/dsl_scan.h>
+/*
+ * Enable/disable prefetching of dedup-ed blocks which are going to be freed.
+ */
+int zfs_dedup_prefetch = 1;
+
static const ddt_ops_t *ddt_ops[DDT_TYPES] = {
&ddt_zap_ops,
};
spa_t *spa = ddt->ddt_spa;
objset_t *os = ddt->ddt_os;
uint64_t *objectp = &ddt->ddt_object[type][class];
+ uint64_t count;
char name[DDT_NAMELEN];
ddt_object_name(ddt, type, class, name);
ASSERT(*objectp != 0);
- ASSERT(ddt_object_count(ddt, type, class) == 0);
ASSERT(ddt_histogram_empty(&ddt->ddt_histogram[type][class]));
+ VERIFY(ddt_object_count(ddt, type, class, &count) == 0 && count == 0);
VERIFY(zap_remove(os, DMU_POOL_DIRECTORY_OBJECT, name, tx) == 0);
VERIFY(zap_remove(os, spa->spa_ddt_stat_object, name, tx) == 0);
VERIFY(ddt_ops[type]->ddt_op_destroy(os, *objectp, tx) == 0);
{
ddt_object_t *ddo = &ddt->ddt_object_stats[type][class];
dmu_object_info_t doi;
+ uint64_t count;
char name[DDT_NAMELEN];
int error;
/*
* Seed the cached statistics.
*/
- VERIFY(ddt_object_info(ddt, type, class, &doi) == 0);
+ error = ddt_object_info(ddt, type, class, &doi);
+ if (error)
+ return (error);
- ddo->ddo_count = ddt_object_count(ddt, type, class);
+ error = ddt_object_count(ddt, type, class, &count);
+ if (error)
+ return (error);
+
+ ddo->ddo_count = count;
ddo->ddo_dspace = doi.doi_physical_blocks_512 << 9;
ddo->ddo_mspace = doi.doi_fill_count * doi.doi_data_block_size;
{
ddt_object_t *ddo = &ddt->ddt_object_stats[type][class];
dmu_object_info_t doi;
+ uint64_t count;
char name[DDT_NAMELEN];
ddt_object_name(ddt, type, class, name);
* Cache DDT statistics; this is the only time they'll change.
*/
VERIFY(ddt_object_info(ddt, type, class, &doi) == 0);
+ VERIFY(ddt_object_count(ddt, type, class, &count) == 0);
- ddo->ddo_count = ddt_object_count(ddt, type, class);
+ ddo->ddo_count = count;
ddo->ddo_dspace = doi.doi_physical_blocks_512 << 9;
ddo->ddo_mspace = doi.doi_fill_count * doi.doi_data_block_size;
}
ddt->ddt_object[type][class], dde, walk));
}
-uint64_t
-ddt_object_count(ddt_t *ddt, enum ddt_type type, enum ddt_class class)
+int
+ddt_object_count(ddt_t *ddt, enum ddt_type type, enum ddt_class class,
+ uint64_t *count)
{
ASSERT(ddt_object_exists(ddt, type, class));
return (ddt_ops[type]->ddt_op_count(ddt->ddt_os,
- ddt->ddt_object[type][class]));
+ ddt->ddt_object[type][class], count));
}
int
void
ddt_bp_fill(const ddt_phys_t *ddp, blkptr_t *bp, uint64_t txg)
{
+ int d;
ASSERT(txg != 0);
- for (int d = 0; d < SPA_DVAS_PER_BP; d++)
+ for (d = 0; d < SPA_DVAS_PER_BP; d++)
bp->blk_dva[d] = ddp->ddp_dva[d];
BP_SET_BIRTH(bp, txg, ddp->ddp_phys_birth);
}
void
ddt_phys_fill(ddt_phys_t *ddp, const blkptr_t *bp)
{
+ int d;
ASSERT(ddp->ddp_phys_birth == 0);
- for (int d = 0; d < SPA_DVAS_PER_BP; d++)
+ for (d = 0; d < SPA_DVAS_PER_BP; d++)
ddp->ddp_dva[d] = bp->blk_dva[d];
ddp->ddp_phys_birth = BP_PHYSICAL_BIRTH(bp);
}
void
ddt_phys_decref(ddt_phys_t *ddp)
{
- ASSERT((int64_t)ddp->ddp_refcnt > 0);
- ddp->ddp_refcnt--;
+ if (ddp) {
+ ASSERT(ddp->ddp_refcnt > 0);
+ ddp->ddp_refcnt--;
+ }
}
void
ddt_phys_select(const ddt_entry_t *dde, const blkptr_t *bp)
{
ddt_phys_t *ddp = (ddt_phys_t *)dde->dde_phys;
+ int p;
- for (int p = 0; p < DDT_PHYS_TYPES; p++, ddp++) {
+ for (p = 0; p < DDT_PHYS_TYPES; p++, ddp++) {
if (DVA_EQUAL(BP_IDENTITY(bp), &ddp->ddp_dva[0]) &&
BP_PHYSICAL_BIRTH(bp) == ddp->ddp_phys_birth)
return (ddp);
ddt_phys_total_refcnt(const ddt_entry_t *dde)
{
uint64_t refcnt = 0;
+ int p;
- for (int p = DDT_PHYS_SINGLE; p <= DDT_PHYS_TRIPLE; p++)
+ for (p = DDT_PHYS_SINGLE; p <= DDT_PHYS_TRIPLE; p++)
refcnt += dde->dde_phys[p].ddp_refcnt;
return (refcnt);
ddt_key_t *ddk = &dde->dde_key;
uint64_t lsize = DDK_GET_LSIZE(ddk);
uint64_t psize = DDK_GET_PSIZE(ddk);
+ int p, d;
bzero(dds, sizeof (*dds));
- for (int p = 0; p < DDT_PHYS_TYPES; p++, ddp++) {
+ for (p = 0; p < DDT_PHYS_TYPES; p++, ddp++) {
uint64_t dsize = 0;
uint64_t refcnt = ddp->ddp_refcnt;
if (ddp->ddp_phys_birth == 0)
continue;
- for (int d = 0; d < SPA_DVAS_PER_BP; d++)
+ for (d = 0; d < SPA_DVAS_PER_BP; d++)
dsize += dva_get_dsize_sync(spa, &ddp->ddp_dva[d]);
dds->dds_blocks += 1;
void
ddt_histogram_add(ddt_histogram_t *dst, const ddt_histogram_t *src)
{
- for (int h = 0; h < 64; h++)
+ int h;
+
+ for (h = 0; h < 64; h++)
ddt_stat_add(&dst->ddh_stat[h], &src->ddh_stat[h], 0);
}
void
ddt_histogram_stat(ddt_stat_t *dds, const ddt_histogram_t *ddh)
{
+ int h;
+
bzero(dds, sizeof (*dds));
- for (int h = 0; h < 64; h++)
+ for (h = 0; h < 64; h++)
ddt_stat_add(dds, &ddh->ddh_stat[h], 0);
}
void
ddt_get_dedup_object_stats(spa_t *spa, ddt_object_t *ddo_total)
{
+ enum zio_checksum c;
+ enum ddt_type type;
+ enum ddt_class class;
+
/* Sum the statistics we cached in ddt_object_sync(). */
- for (enum zio_checksum c = 0; c < ZIO_CHECKSUM_FUNCTIONS; c++) {
+ for (c = 0; c < ZIO_CHECKSUM_FUNCTIONS; c++) {
ddt_t *ddt = spa->spa_ddt[c];
- for (enum ddt_type type = 0; type < DDT_TYPES; type++) {
- for (enum ddt_class class = 0; class < DDT_CLASSES;
+ for (type = 0; type < DDT_TYPES; type++) {
+ for (class = 0; class < DDT_CLASSES;
class++) {
ddt_object_t *ddo =
&ddt->ddt_object_stats[type][class];
if (ddo_total->ddo_count != 0) {
ddo_total->ddo_dspace /= ddo_total->ddo_count;
ddo_total->ddo_mspace /= ddo_total->ddo_count;
- } else {
- ASSERT(ddo_total->ddo_dspace == 0);
- ASSERT(ddo_total->ddo_mspace == 0);
}
}
void
ddt_get_dedup_histogram(spa_t *spa, ddt_histogram_t *ddh)
{
- for (enum zio_checksum c = 0; c < ZIO_CHECKSUM_FUNCTIONS; c++) {
+ enum zio_checksum c;
+ enum ddt_type type;
+ enum ddt_class class;
+
+ for (c = 0; c < ZIO_CHECKSUM_FUNCTIONS; c++) {
ddt_t *ddt = spa->spa_ddt[c];
- for (enum ddt_type type = 0; type < DDT_TYPES; type++) {
- for (enum ddt_class class = 0; class < DDT_CLASSES;
+ for (type = 0; type < DDT_TYPES; type++) {
+ for (class = 0; class < DDT_CLASSES;
class++) {
ddt_histogram_add(ddh,
&ddt->ddt_histogram_cache[type][class]);
{
ddt_histogram_t *ddh_total;
- ddh_total = kmem_zalloc(sizeof (ddt_histogram_t), KM_SLEEP);
+ /* XXX: Move to a slab */
+ ddh_total = kmem_zalloc(sizeof (ddt_histogram_t), KM_PUSHPAGE);
ddt_get_dedup_histogram(spa, ddh_total);
ddt_histogram_stat(dds_total, ddh_total);
kmem_free(ddh_total, sizeof (ddt_histogram_t));
uint64_t ditto = spa->spa_dedup_ditto;
int total_copies = 0;
int desired_copies = 0;
+ int p;
- for (int p = DDT_PHYS_SINGLE; p <= DDT_PHYS_TRIPLE; p++) {
+ for (p = DDT_PHYS_SINGLE; p <= DDT_PHYS_TRIPLE; p++) {
ddt_phys_t *ddp = &dde->dde_phys[p];
zio_t *zio = dde->dde_lead_zio[p];
uint64_t refcnt = ddp->ddp_refcnt; /* committed refs */
ddt_phys_t *ddp = &dde->dde_phys[DDT_PHYS_DITTO];
dva_t *dva = ddp->ddp_dva;
int copies = 0 - DVA_GET_GANG(dva);
+ int d;
- for (int d = 0; d < SPA_DVAS_PER_BP; d++, dva++)
+ for (d = 0; d < SPA_DVAS_PER_BP; d++, dva++)
if (DVA_IS_VALID(dva))
copies++;
{
ddt_entry_t *dde;
- dde = kmem_zalloc(sizeof (ddt_entry_t), KM_SLEEP);
+ /* XXX: Move to a slab */
+ dde = kmem_zalloc(sizeof (ddt_entry_t), KM_PUSHPAGE);
cv_init(&dde->dde_cv, NULL, CV_DEFAULT, NULL);
dde->dde_key = *ddk;
static void
ddt_free(ddt_entry_t *dde)
{
+ int p;
+
ASSERT(!dde->dde_loading);
- for (int p = 0; p < DDT_PHYS_TYPES; p++)
+ for (p = 0; p < DDT_PHYS_TYPES; p++)
ASSERT(dde->dde_lead_zio[p] == NULL);
if (dde->dde_repair_data != NULL)
{
ddt_t *ddt;
ddt_entry_t dde;
+ enum ddt_type type;
+ enum ddt_class class;
- if (!BP_GET_DEDUP(bp))
+ if (!zfs_dedup_prefetch || bp == NULL || !BP_GET_DEDUP(bp))
return;
/*
- * We remove the DDT once it's empty and only prefetch dedup blocks
- * when there are entries in the DDT. Thus no locking is required
- * as the DDT can't disappear on us.
+ * We only remove the DDT once all tables are empty and only
+ * prefetch dedup blocks when there are entries in the DDT.
+ * Thus no locking is required as the DDT can't disappear on us.
*/
ddt = ddt_select(spa, bp);
ddt_key_fill(&dde.dde_key, bp);
- for (enum ddt_type type = 0; type < DDT_TYPES; type++) {
- for (enum ddt_class class = 0; class < DDT_CLASSES; class++) {
+ for (type = 0; type < DDT_TYPES; type++) {
+ for (class = 0; class < DDT_CLASSES; class++) {
ddt_object_prefetch(ddt, type, class, &dde);
}
}
const ddt_entry_t *dde2 = x2;
const uint64_t *u1 = (const uint64_t *)&dde1->dde_key;
const uint64_t *u2 = (const uint64_t *)&dde2->dde_key;
+ int i;
- for (int i = 0; i < DDT_KEY_WORDS; i++) {
+ for (i = 0; i < DDT_KEY_WORDS; i++) {
if (u1[i] < u2[i])
return (-1);
if (u1[i] > u2[i])
{
ddt_t *ddt;
- ddt = kmem_zalloc(sizeof (*ddt), KM_SLEEP);
+ /* XXX: Move to a slab */
+ ddt = kmem_zalloc(sizeof (*ddt), KM_PUSHPAGE | KM_NODEBUG);
mutex_init(&ddt->ddt_lock, NULL, MUTEX_DEFAULT, NULL);
avl_create(&ddt->ddt_tree, ddt_entry_compare,
void
ddt_create(spa_t *spa)
{
+ enum zio_checksum c;
+
spa->spa_dedup_checksum = ZIO_DEDUPCHECKSUM;
- for (enum zio_checksum c = 0; c < ZIO_CHECKSUM_FUNCTIONS; c++)
+ for (c = 0; c < ZIO_CHECKSUM_FUNCTIONS; c++)
spa->spa_ddt[c] = ddt_table_alloc(spa, c);
}
int
ddt_load(spa_t *spa)
{
+ enum zio_checksum c;
+ enum ddt_type type;
+ enum ddt_class class;
int error;
ddt_create(spa);
if (error)
return (error == ENOENT ? 0 : error);
- for (enum zio_checksum c = 0; c < ZIO_CHECKSUM_FUNCTIONS; c++) {
+ for (c = 0; c < ZIO_CHECKSUM_FUNCTIONS; c++) {
ddt_t *ddt = spa->spa_ddt[c];
- for (enum ddt_type type = 0; type < DDT_TYPES; type++) {
- for (enum ddt_class class = 0; class < DDT_CLASSES;
+ for (type = 0; type < DDT_TYPES; type++) {
+ for (class = 0; class < DDT_CLASSES;
class++) {
error = ddt_object_load(ddt, type, class);
if (error != 0 && error != ENOENT)
void
ddt_unload(spa_t *spa)
{
- for (enum zio_checksum c = 0; c < ZIO_CHECKSUM_FUNCTIONS; c++) {
+ enum zio_checksum c;
+
+ for (c = 0; c < ZIO_CHECKSUM_FUNCTIONS; c++) {
if (spa->spa_ddt[c]) {
ddt_table_free(spa->spa_ddt[c]);
spa->spa_ddt[c] = NULL;
ddt_class_contains(spa_t *spa, enum ddt_class max_class, const blkptr_t *bp)
{
ddt_t *ddt;
- ddt_entry_t dde;
+ ddt_entry_t *dde;
+ enum ddt_type type;
+ enum ddt_class class;
if (!BP_GET_DEDUP(bp))
return (B_FALSE);
return (B_TRUE);
ddt = spa->spa_ddt[BP_GET_CHECKSUM(bp)];
+ dde = kmem_alloc(sizeof(ddt_entry_t), KM_PUSHPAGE);
- ddt_key_fill(&dde.dde_key, bp);
+ ddt_key_fill(&(dde->dde_key), bp);
- for (enum ddt_type type = 0; type < DDT_TYPES; type++)
- for (enum ddt_class class = 0; class <= max_class; class++)
- if (ddt_object_lookup(ddt, type, class, &dde) == 0)
+ for (type = 0; type < DDT_TYPES; type++) {
+ for (class = 0; class <= max_class; class++) {
+ if (ddt_object_lookup(ddt, type, class, dde) == 0) {
+ kmem_free(dde, sizeof(ddt_entry_t));
return (B_TRUE);
+ }
+ }
+ }
+ kmem_free(dde, sizeof(ddt_entry_t));
return (B_FALSE);
}
{
ddt_key_t ddk;
ddt_entry_t *dde;
+ enum ddt_type type;
+ enum ddt_class class;
ddt_key_fill(&ddk, bp);
dde = ddt_alloc(&ddk);
- for (enum ddt_type type = 0; type < DDT_TYPES; type++) {
- for (enum ddt_class class = 0; class < DDT_CLASSES; class++) {
+ for (type = 0; type < DDT_TYPES; type++) {
+ for (class = 0; class < DDT_CLASSES; class++) {
/*
* We can only do repair if there are multiple copies
* of the block. For anything in the UNIQUE class,
ddt_key_t *rddk = &rdde->dde_key;
zio_t *zio;
blkptr_t blk;
+ int p;
zio = zio_null(rio, rio->io_spa, NULL,
ddt_repair_entry_done, rdde, rio->io_flags);
- for (int p = 0; p < DDT_PHYS_TYPES; p++, ddp++, rddp++) {
+ for (p = 0; p < DDT_PHYS_TYPES; p++, ddp++, rddp++) {
if (ddp->ddp_phys_birth == 0 ||
ddp->ddp_phys_birth != rddp->ddp_phys_birth ||
bcmp(ddp->ddp_dva, rddp->ddp_dva, sizeof (ddp->ddp_dva)))
enum ddt_class oclass = dde->dde_class;
enum ddt_class nclass;
uint64_t total_refcnt = 0;
+ int p;
ASSERT(dde->dde_loaded);
ASSERT(!dde->dde_loading);
- for (int p = 0; p < DDT_PHYS_TYPES; p++, ddp++) {
+ for (p = 0; p < DDT_PHYS_TYPES; p++, ddp++) {
ASSERT(dde->dde_lead_zio[p] == NULL);
- ASSERT((int64_t)ddp->ddp_refcnt >= 0);
if (ddp->ddp_phys_birth == 0) {
ASSERT(ddp->ddp_refcnt == 0);
continue;
spa_t *spa = ddt->ddt_spa;
ddt_entry_t *dde;
void *cookie = NULL;
+ enum ddt_type type;
+ enum ddt_class class;
if (avl_numnodes(&ddt->ddt_tree) == 0)
return;
ASSERT(spa->spa_uberblock.ub_version >= SPA_VERSION_DEDUP);
if (spa->spa_ddt_stat_object == 0) {
- spa->spa_ddt_stat_object = zap_create(ddt->ddt_os,
- DMU_OT_DDT_STATS, DMU_OT_NONE, 0, tx);
- VERIFY(zap_add(ddt->ddt_os, DMU_POOL_DIRECTORY_OBJECT,
- DMU_POOL_DDT_STATS, sizeof (uint64_t), 1,
- &spa->spa_ddt_stat_object, tx) == 0);
+ spa->spa_ddt_stat_object = zap_create_link(ddt->ddt_os,
+ DMU_OT_DDT_STATS, DMU_POOL_DIRECTORY_OBJECT,
+ DMU_POOL_DDT_STATS, tx);
}
while ((dde = avl_destroy_nodes(&ddt->ddt_tree, &cookie)) != NULL) {
ddt_free(dde);
}
- for (enum ddt_type type = 0; type < DDT_TYPES; type++) {
- for (enum ddt_class class = 0; class < DDT_CLASSES; class++) {
- if (!ddt_object_exists(ddt, type, class))
- continue;
- ddt_object_sync(ddt, type, class, tx);
- if (ddt_object_count(ddt, type, class) == 0)
+ for (type = 0; type < DDT_TYPES; type++) {
+ uint64_t add, count = 0;
+ for (class = 0; class < DDT_CLASSES; class++) {
+ if (ddt_object_exists(ddt, type, class)) {
+ ddt_object_sync(ddt, type, class, tx);
+ VERIFY(ddt_object_count(ddt, type, class,
+ &add) == 0);
+ count += add;
+ }
+ }
+ for (class = 0; class < DDT_CLASSES; class++) {
+ if (count == 0 && ddt_object_exists(ddt, type, class))
ddt_object_destroy(ddt, type, class, tx);
}
}
dmu_tx_t *tx;
zio_t *rio = zio_root(spa, NULL, NULL,
ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE);
+ enum zio_checksum c;
ASSERT(spa_syncing_txg(spa) == txg);
tx = dmu_tx_create_assigned(spa->spa_dsl_pool, txg);
- for (enum zio_checksum c = 0; c < ZIO_CHECKSUM_FUNCTIONS; c++) {
+ for (c = 0; c < ZIO_CHECKSUM_FUNCTIONS; c++) {
ddt_t *ddt = spa->spa_ddt[c];
if (ddt == NULL)
continue;
return (ENOENT);
}
+
+#if defined(_KERNEL) && defined(HAVE_SPL)
+module_param(zfs_dedup_prefetch, int, 0644);
+MODULE_PARM_DESC(zfs_dedup_prefetch,"Enable prefetching dedup-ed blks");
+#endif