/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
- * Copyright (c) 2013 by Delphix. All rights reserved.
+ * Copyright (c) 2012, 2014 by Delphix. All rights reserved.
* Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
*/
#include <sys/dmu_zfetch.h>
#include <sys/sa.h>
#include <sys/sa_impl.h>
+#include <sys/zfeature.h>
+#include <sys/blkptr.h>
+#include <sys/range_tree.h>
+#include <sys/trace_dbuf.h>
struct dbuf_hold_impl_data {
/* Function arguments */
}
/*
- * Remove an entry from the hash table. This operation will
- * fail if there are any existing holds on the db.
+ * Remove an entry from the hash table. It must be in the EVICTING state.
*/
static void
dbuf_hash_remove(dmu_buf_impl_t *db)
idx = hv & h->hash_table_mask;
/*
- * We musn't hold db_mtx to maintin lock ordering:
+ * We musn't hold db_mtx to maintain lock ordering:
* DBUF_HASH_MUTEX > db_mtx.
*/
ASSERT(refcount_is_zero(&db->db_holds));
boolean_t
dbuf_is_metadata(dmu_buf_impl_t *db)
{
- if (db->db_level > 0) {
+ /*
+ * Consider indirect blocks and spill blocks to be meta data.
+ */
+ if (db->db_level > 0 || db->db_blkid == DMU_SPILL_BLKID) {
return (B_TRUE);
} else {
boolean_t is_metadata;
* Large allocations which do not require contiguous pages
* should be using vmem_alloc() in the linux kernel
*/
- h->hash_table = vmem_zalloc(hsize * sizeof (void *), KM_PUSHPAGE);
+ h->hash_table = vmem_zalloc(hsize * sizeof (void *), KM_SLEEP);
#else
h->hash_table = kmem_zalloc(hsize * sizeof (void *), KM_NOSLEEP);
#endif
dbuf_set_data(dmu_buf_impl_t *db, arc_buf_t *buf)
{
ASSERT(MUTEX_HELD(&db->db_mtx));
- ASSERT(db->db_buf == NULL || !arc_has_callback(db->db_buf));
db->db_buf = buf;
if (buf != NULL) {
ASSERT(buf->b_data != NULL);
mutex_enter(&db->db_mtx);
if (arc_released(db->db_buf) || refcount_count(&db->db_holds) > 1) {
int blksz = db->db.db_size;
- spa_t *spa;
+ spa_t *spa = db->db_objset->os_spa;
mutex_exit(&db->db_mtx);
- DB_GET_SPA(&spa, db);
abuf = arc_loan_buf(spa, blksz);
bcopy(db->db.db_data, abuf->b_data, blksz);
} else {
dbuf_rele_and_unlock(db, NULL);
}
-static void
+static int
dbuf_read_impl(dmu_buf_impl_t *db, zio_t *zio, uint32_t *flags)
{
dnode_t *dn;
- spa_t *spa;
- zbookmark_t zb;
+ zbookmark_phys_t zb;
uint32_t aflags = ARC_NOWAIT;
+ int err;
DB_DNODE_ENTER(db);
dn = DB_DNODE(db);
dbuf_update_data(db);
db->db_state = DB_CACHED;
mutex_exit(&db->db_mtx);
- return;
+ return (0);
}
/*
BP_IS_HOLE(db->db_blkptr)))) {
arc_buf_contents_t type = DBUF_GET_BUFC_TYPE(db);
- dbuf_set_data(db, arc_buf_alloc(dn->dn_objset->os_spa,
- db->db.db_size, db, type));
DB_DNODE_EXIT(db);
+ dbuf_set_data(db, arc_buf_alloc(db->db_objset->os_spa,
+ db->db.db_size, db, type));
bzero(db->db.db_data, db->db.db_size);
db->db_state = DB_CACHED;
*flags |= DB_RF_CACHED;
mutex_exit(&db->db_mtx);
- return;
+ return (0);
}
- spa = dn->dn_objset->os_spa;
DB_DNODE_EXIT(db);
db->db_state = DB_READ;
dbuf_add_ref(db, NULL);
- (void) arc_read(zio, spa, db->db_blkptr,
+ err = arc_read(zio, db->db_objset->os_spa, db->db_blkptr,
dbuf_read_done, db, ZIO_PRIORITY_SYNC_READ,
(*flags & DB_RF_CANFAIL) ? ZIO_FLAG_CANFAIL : ZIO_FLAG_MUSTSUCCEED,
&aflags, &zb);
if (aflags & ARC_CACHED)
*flags |= DB_RF_CACHED;
+
+ return (SET_ERROR(err));
}
int
dbuf_read(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags)
{
int err = 0;
- int havepzio = (zio != NULL);
- int prefetch;
+ boolean_t havepzio = (zio != NULL);
+ boolean_t prefetch;
dnode_t *dn;
/*
if (zio == NULL)
zio = zio_root(spa, NULL, NULL, ZIO_FLAG_CANFAIL);
- dbuf_read_impl(db, zio, &flags);
+
+ err = dbuf_read_impl(db, zio, &flags);
/* dbuf_read_impl has dropped db_mtx for us */
- if (prefetch)
+ if (!err && prefetch)
dmu_zfetch(&dn->dn_zfetch, db->db.db_offset,
db->db.db_size, flags & DB_RF_CACHED);
rw_exit(&dn->dn_struct_rwlock);
DB_DNODE_EXIT(db);
- if (!havepzio)
+ if (!err && !havepzio)
err = zio_wait(zio);
} else {
/*
db->db_state == DB_FILL) {
ASSERT(db->db_state == DB_READ ||
(flags & DB_RF_HAVESTRUCT) == 0);
+ DTRACE_PROBE2(blocked__read, dmu_buf_impl_t *,
+ db, zio_t *, zio);
cv_wait(&db->db_changed, &db->db_mtx);
}
if (db->db_state == DB_UNCACHED)
cv_wait(&db->db_changed, &db->db_mtx);
if (db->db_state == DB_UNCACHED) {
arc_buf_contents_t type = DBUF_GET_BUFC_TYPE(db);
- spa_t *spa;
+ spa_t *spa = db->db_objset->os_spa;
ASSERT(db->db_buf == NULL);
ASSERT(db->db.db_data == NULL);
- DB_GET_SPA(&spa, db);
dbuf_set_data(db, arc_buf_alloc(spa, db->db.db_size, db, type));
db->db_state = DB_FILL;
} else if (db->db_state == DB_NOFILL) {
} else if (refcount_count(&db->db_holds) > db->db_dirtycnt) {
int size = db->db.db_size;
arc_buf_contents_t type = DBUF_GET_BUFC_TYPE(db);
- spa_t *spa;
+ spa_t *spa = db->db_objset->os_spa;
- DB_GET_SPA(&spa, db);
dr->dt.dl.dr_data = arc_buf_alloc(spa, size, db, type);
bcopy(db->db.db_data, dr->dt.dl.dr_data->b_data, size);
} else {
ASSERT(db->db_data_pending != dr);
/* free this block */
- if (!BP_IS_HOLE(bp) && !dr->dt.dl.dr_nopwrite) {
- spa_t *spa;
+ if (!BP_IS_HOLE(bp) && !dr->dt.dl.dr_nopwrite)
+ zio_free(db->db_objset->os_spa, txg, bp);
- DB_GET_SPA(&spa, db);
- zio_free(spa, txg, bp);
- }
dr->dt.dl.dr_override_state = DR_NOT_OVERRIDDEN;
dr->dt.dl.dr_nopwrite = B_FALSE;
/*
* Evict (if its unreferenced) or clear (if its referenced) any level-0
* data blocks in the free range, so that any future readers will find
- * empty blocks. Also, if we happen across any level-1 dbufs in the
- * range that have not already been marked dirty, mark them dirty so
- * they stay in memory.
+ * empty blocks.
*
* This is a no-op if the dataset is in the middle of an incremental
* receive; see comment below for details.
{
dmu_buf_impl_t *db, *db_next;
uint64_t txg = tx->tx_txg;
- int epbs = dn->dn_indblkshift - SPA_BLKPTRSHIFT;
- uint64_t first_l1 = start >> epbs;
- uint64_t last_l1 = end >> epbs;
+ boolean_t freespill =
+ (start == DMU_SPILL_BLKID || end == DMU_SPILL_BLKID);
- if (end > dn->dn_maxblkid && (end != DMU_SPILL_BLKID)) {
+ if (end > dn->dn_maxblkid && !freespill)
end = dn->dn_maxblkid;
- last_l1 = end >> epbs;
- }
dprintf_dnode(dn, "start=%llu end=%llu\n", start, end);
mutex_enter(&dn->dn_dbufs_mtx);
- if (start >= dn->dn_unlisted_l0_blkid * dn->dn_datablksz) {
+ if (start >= dn->dn_unlisted_l0_blkid * dn->dn_datablksz &&
+ !freespill) {
/* There can't be any dbufs in this range; no need to search. */
mutex_exit(&dn->dn_dbufs_mtx);
return;
db_next = list_next(&dn->dn_dbufs, db);
ASSERT(db->db_blkid != DMU_BONUS_BLKID);
- if (db->db_level == 1 &&
- db->db_blkid >= first_l1 && db->db_blkid <= last_l1) {
- mutex_enter(&db->db_mtx);
- if (db->db_last_dirty &&
- db->db_last_dirty->dr_txg < txg) {
- dbuf_add_ref(db, FTAG);
- mutex_exit(&db->db_mtx);
- dbuf_will_dirty(db, tx);
- dbuf_rele(db, FTAG);
- } else {
- mutex_exit(&db->db_mtx);
- }
- }
-
+ /* Skip indirect blocks. */
if (db->db_level != 0)
continue;
- dprintf_dbuf(db, "found buf %s\n", "");
- if (db->db_blkid < start || db->db_blkid > end)
+ /* Skip direct blocks outside the range. */
+ if (!freespill && (db->db_blkid < start || db->db_blkid > end))
+ continue;
+ /* Skip all direct blocks, only free spill blocks. */
+ if (freespill && (db->db_blkid != DMU_SPILL_BLKID))
continue;
/* found a level 0 buffer in the range */
* We don't need any locking to protect db_blkptr:
* If it's syncing, then db_last_dirty will be set
* so we'll ignore db_blkptr.
+ *
+ * This logic ensures that only block births for
+ * filled blocks are considered.
*/
ASSERT(MUTEX_HELD(&db->db_mtx));
- if (db->db_last_dirty)
+ if (db->db_last_dirty && (db->db_blkptr == NULL ||
+ !BP_IS_HOLE(db->db_blkptr))) {
birth_txg = db->db_last_dirty->dr_txg;
- else if (db->db_blkptr)
+ } else if (db->db_blkptr != NULL && !BP_IS_HOLE(db->db_blkptr)) {
birth_txg = db->db_blkptr->blk_birth;
+ }
/*
- * If we don't exist or are in a snapshot, we can't be freed.
+ * If this block don't exist or is in a snapshot, it can't be freed.
* Don't pass the bp to dsl_dataset_block_freeable() since we
* are holding the db_mtx lock and might deadlock if we are
* prefetching a dedup-ed block.
*/
- if (birth_txg)
+ if (birth_txg != 0)
return (ds == NULL ||
dsl_dataset_block_freeable(ds, NULL, birth_txg));
else
- return (FALSE);
+ return (B_FALSE);
}
void
ASSERT(RW_WRITE_HELD(&dn->dn_struct_rwlock));
/*
- * This call to dbuf_will_dirty() with the dn_struct_rwlock held
+ * This call to dmu_buf_will_dirty() with the dn_struct_rwlock held
* is OK, because there can be no other references to the db
* when we are changing its size, so no concurrent DB_FILL can
* be happening.
* XXX we should be doing a dbuf_read, checking the return
* value and returning that up to our callers
*/
- dbuf_will_dirty(db, tx);
+ dmu_buf_will_dirty(&db->db, tx);
/* create the data buffer for the new block */
buf = arc_buf_alloc(dn->dn_objset->os_spa, size, db, type);
void
dbuf_release_bp(dmu_buf_impl_t *db)
{
- objset_t *os;
+ ASSERTV(objset_t *os = db->db_objset);
- DB_GET_OBJSET(&os, db);
ASSERT(dsl_pool_sync_context(dmu_objset_pool(os)));
ASSERT(arc_released(os->os_phys_buf) ||
list_link_active(&os->os_dsl_dataset->ds_synced_link));
dn->dn_dirtyctx =
(dmu_tx_is_syncing(tx) ? DN_DIRTY_SYNC : DN_DIRTY_OPEN);
ASSERT(dn->dn_dirtyctx_firstset == NULL);
- dn->dn_dirtyctx_firstset = kmem_alloc(1, KM_PUSHPAGE);
+ dn->dn_dirtyctx_firstset = kmem_alloc(1, KM_SLEEP);
}
mutex_exit(&dn->dn_mtx);
* to make a copy of it so that the changes we make in this
* transaction group won't leak out when we sync the older txg.
*/
- dr = kmem_zalloc(sizeof (dbuf_dirty_record_t), KM_PUSHPAGE);
+ dr = kmem_zalloc(sizeof (dbuf_dirty_record_t), KM_SLEEP);
list_link_init(&dr->dr_dirty_node);
if (db->db_level == 0) {
void *data_old = db->db_buf;
if (db->db_level == 0 && db->db_blkid != DMU_BONUS_BLKID &&
db->db_blkid != DMU_SPILL_BLKID) {
mutex_enter(&dn->dn_mtx);
- dnode_clear_range(dn, db->db_blkid, 1, tx);
+ if (dn->dn_free_ranges[txgoff] != NULL) {
+ range_tree_clear(dn->dn_free_ranges[txgoff],
+ db->db_blkid, 1);
+ }
mutex_exit(&dn->dn_mtx);
db->db_freed_in_flight = FALSE;
}
DB_DNODE_ENTER(db);
dn = DB_DNODE(db);
- /*
- * Note: This code will probably work even if there are concurrent
- * holders, but it is untested in that scenerio, as the ZPL and
- * ztest have additional locking (the range locks) that prevents
- * that type of concurrent access.
- */
- ASSERT3U(refcount_count(&db->db_holds), ==, db->db_dirtycnt);
-
dprintf_dbuf(db, "size=%llx\n", (u_longlong_t)db->db.db_size);
ASSERT(db->db.db_size != 0);
return (B_FALSE);
}
-#pragma weak dmu_buf_will_dirty = dbuf_will_dirty
void
-dbuf_will_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx)
+dmu_buf_will_dirty(dmu_buf_t *db_fake, dmu_tx_t *tx)
{
+ dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake;
int rf = DB_RF_MUST_SUCCEED | DB_RF_NOPREFETCH;
ASSERT(tx->tx_txg != 0);
mutex_exit(&db->db_mtx);
}
+void
+dmu_buf_write_embedded(dmu_buf_t *dbuf, void *data,
+ bp_embedded_type_t etype, enum zio_compress comp,
+ int uncompressed_size, int compressed_size, int byteorder,
+ dmu_tx_t *tx)
+{
+ dmu_buf_impl_t *db = (dmu_buf_impl_t *)dbuf;
+ struct dirty_leaf *dl;
+ dmu_object_type_t type;
+
+ DB_DNODE_ENTER(db);
+ type = DB_DNODE(db)->dn_type;
+ DB_DNODE_EXIT(db);
+
+ ASSERT0(db->db_level);
+ ASSERT(db->db_blkid != DMU_BONUS_BLKID);
+
+ dmu_buf_will_not_fill(dbuf, tx);
+
+ ASSERT3U(db->db_last_dirty->dr_txg, ==, tx->tx_txg);
+ dl = &db->db_last_dirty->dt.dl;
+ encode_embedded_bp_compressed(&dl->dr_overridden_by,
+ data, comp, uncompressed_size, compressed_size);
+ BPE_SET_ETYPE(&dl->dr_overridden_by, etype);
+ BP_SET_TYPE(&dl->dr_overridden_by, type);
+ BP_SET_LEVEL(&dl->dr_overridden_by, 0);
+ BP_SET_BYTEORDER(&dl->dr_overridden_by, byteorder);
+
+ dl->dr_override_state = DR_OVERRIDDEN;
+ dl->dr_overridden_by.blk_birth = db->db_last_dirty->dr_txg;
+}
+
/*
* Directly assign a provided arc buf to a given dbuf if it's not referenced
* by anybody except our caller. Otherwise copy arcbuf's contents to dbuf.
db->db_state = DB_FILL;
mutex_exit(&db->db_mtx);
(void) dbuf_dirty(db, tx);
- dbuf_fill_done(db, tx);
+ dmu_buf_fill_done(&db->db, tx);
}
/*
* when we are not holding the dn_dbufs_mtx, we can't clear the
* entry in the dn_dbufs list. We have to wait until dbuf_destroy()
* in this case. For callers from the DMU we will usually see:
- * dbuf_clear()->arc_buf_evict()->dbuf_do_evict()->dbuf_destroy()
+ * dbuf_clear()->arc_clear_callback()->dbuf_do_evict()->dbuf_destroy()
* For the arc callback, we will usually see:
* dbuf_do_evict()->dbuf_clear();dbuf_destroy()
* Sometimes, though, we will get a mix of these two:
- * DMU: dbuf_clear()->arc_buf_evict()
+ * DMU: dbuf_clear()->arc_clear_callback()
* ARC: dbuf_do_evict()->dbuf_destroy()
+ *
+ * This routine will dissociate the dbuf from the arc, by calling
+ * arc_clear_callback(), but will not evict the data from the ARC.
*/
void
dbuf_clear(dmu_buf_impl_t *db)
dnode_t *dn;
dmu_buf_impl_t *parent = db->db_parent;
dmu_buf_impl_t *dndb;
- int dbuf_gone = FALSE;
+ boolean_t dbuf_gone = B_FALSE;
ASSERT(MUTEX_HELD(&db->db_mtx));
ASSERT(refcount_is_zero(&db->db_holds));
dndb = dn->dn_dbuf;
if (db->db_blkid != DMU_BONUS_BLKID && MUTEX_HELD(&dn->dn_dbufs_mtx)) {
list_remove(&dn->dn_dbufs, db);
- (void) atomic_dec_32_nv(&dn->dn_dbufs_count);
+ atomic_dec_32(&dn->dn_dbufs_count);
membar_producer();
DB_DNODE_EXIT(db);
/*
}
if (db->db_buf)
- dbuf_gone = arc_buf_evict(db->db_buf);
+ dbuf_gone = arc_clear_callback(db->db_buf);
if (!dbuf_gone)
mutex_exit(&db->db_mtx);
ASSERT(RW_LOCK_HELD(&dn->dn_struct_rwlock));
ASSERT(dn->dn_type != DMU_OT_NONE);
- db = kmem_cache_alloc(dbuf_cache, KM_PUSHPAGE);
+ db = kmem_cache_alloc(dbuf_cache, KM_SLEEP);
db->db_objset = os;
db->db.db_object = dn->dn_object;
ASSERT(dn->dn_object == DMU_META_DNODE_OBJECT ||
refcount_count(&dn->dn_holds) > 0);
(void) refcount_add(&dn->dn_holds, db);
- (void) atomic_inc_32_nv(&dn->dn_dbufs_count);
+ atomic_inc_32(&dn->dn_dbufs_count);
dprintf_dbuf(db, "db=%p\n", db);
static int
dbuf_do_evict(void *private)
{
- arc_buf_t *buf = private;
- dmu_buf_impl_t *db = buf->b_private;
+ dmu_buf_impl_t *db = private;
if (!MUTEX_HELD(&db->db_mtx))
mutex_enter(&db->db_mtx);
dn = DB_DNODE(db);
mutex_enter(&dn->dn_dbufs_mtx);
list_remove(&dn->dn_dbufs, db);
- (void) atomic_dec_32_nv(&dn->dn_dbufs_count);
+ atomic_dec_32(&dn->dn_dbufs_count);
mutex_exit(&dn->dn_dbufs_mtx);
DB_DNODE_EXIT(db);
/*
}
if (dbuf_findbp(dn, 0, blkid, TRUE, &db, &bp, NULL) == 0) {
- if (bp && !BP_IS_HOLE(bp)) {
+ if (bp && !BP_IS_HOLE(bp) && !BP_IS_EMBEDDED(bp)) {
dsl_dataset_t *ds = dn->dn_objset->os_dsl_dataset;
uint32_t aflags = ARC_NOWAIT | ARC_PREFETCH;
- zbookmark_t zb;
+ zbookmark_phys_t zb;
SET_BOOKMARK(&zb, ds ? ds->ds_object : DMU_META_OBJSET,
dn->dn_object, 0, blkid);
int error;
dh = kmem_zalloc(sizeof (struct dbuf_hold_impl_data) *
- DBUF_HOLD_IMPL_MAX_DEPTH, KM_PUSHPAGE);
+ DBUF_HOLD_IMPL_MAX_DEPTH, KM_SLEEP);
__dbuf_hold_impl_init(dh, dn, level, blkid, fail_sparse, tag, dbp, 0);
error = __dbuf_hold_impl(dh);
* Without that, the dbuf_rele() could lead to a dnode_rele() followed by the
* dnode's parent dbuf evicting its dnode handles.
*/
-#pragma weak dmu_buf_rele = dbuf_rele
void
dbuf_rele(dmu_buf_impl_t *db, void *tag)
{
dbuf_rele_and_unlock(db, tag);
}
+void
+dmu_buf_rele(dmu_buf_t *db, void *tag)
+{
+ dbuf_rele((dmu_buf_impl_t *)db, tag);
+}
+
/*
* dbuf_rele() for an already-locked dbuf. This is necessary to allow
* db_dirtycnt and db_holds to be updated atomically.
if (holds == 0) {
if (db->db_blkid == DMU_BONUS_BLKID) {
- mutex_exit(&db->db_mtx);
+ dnode_t *dn;
/*
- * If the dnode moves here, we cannot cross this barrier
- * until the move completes.
+ * If the dnode moves here, we cannot cross this
+ * barrier until the move completes.
*/
DB_DNODE_ENTER(db);
- (void) atomic_dec_32_nv(&DB_DNODE(db)->dn_dbufs_count);
+
+ dn = DB_DNODE(db);
+ atomic_dec_32(&dn->dn_dbufs_count);
+
+ /*
+ * Decrementing the dbuf count means that the bonus
+ * buffer's dnode hold is no longer discounted in
+ * dnode_move(). The dnode cannot move until after
+ * the dnode_rele_and_unlock() below.
+ */
DB_DNODE_EXIT(db);
+
+ /*
+ * Do not reference db after its lock is dropped.
+ * Another thread may evict it.
+ */
+ mutex_exit(&db->db_mtx);
+
/*
- * The bonus buffer's dnode hold is no longer discounted
- * in dnode_move(). The dnode cannot move until after
- * the dnode_rele().
+ * If the dnode has been freed, evict the bonus
+ * buffer immediately. The data in the bonus
+ * buffer is no longer relevant and this prevents
+ * a stale bonus buffer from being associated
+ * with this dnode_t should the dnode_t be reused
+ * prior to being destroyed.
*/
- dnode_rele(DB_DNODE(db), db);
+ mutex_enter(&dn->dn_mtx);
+ if (dn->dn_type == DMU_OT_NONE ||
+ dn->dn_free_txg != 0) {
+ /*
+ * Drop dn_mtx. It is a leaf lock and
+ * cannot be held when dnode_evict_bonus()
+ * acquires other locks in order to
+ * perform the eviction.
+ *
+ * Freed dnodes cannot be reused until the
+ * last hold is released. Since this bonus
+ * buffer has a hold, the dnode will remain
+ * in the free state, even without dn_mtx
+ * held, until the dnode_rele_and_unlock()
+ * below.
+ */
+ mutex_exit(&dn->dn_mtx);
+ dnode_evict_bonus(dn);
+ mutex_enter(&dn->dn_mtx);
+ }
+ dnode_rele_and_unlock(dn, db);
} else if (db->db_buf == NULL) {
/*
* This is a special case: we never associated this
* block on-disk. If so, then we simply evict
* ourselves.
*/
- if (!DBUF_IS_CACHEABLE(db) ||
- arc_buf_eviction_needed(db->db_buf))
+ if (!DBUF_IS_CACHEABLE(db)) {
+ if (db->db_blkptr != NULL &&
+ !BP_IS_HOLE(db->db_blkptr) &&
+ !BP_IS_EMBEDDED(db->db_blkptr)) {
+ spa_t *spa =
+ dmu_objset_spa(db->db_objset);
+ blkptr_t bp = *db->db_blkptr;
+ dbuf_clear(db);
+ arc_freed(spa, &bp);
+ } else {
+ dbuf_clear(db);
+ }
+ } else if (arc_buf_eviction_needed(db->db_buf)) {
dbuf_clear(db);
- else
+ } else {
mutex_exit(&db->db_mtx);
+ }
}
} else {
mutex_exit(&db->db_mtx);
uint64_t fill = 0;
int i;
- ASSERT(db->db_blkptr == bp);
+ ASSERT3P(db->db_blkptr, ==, bp);
DB_DNODE_ENTER(db);
dn = DB_DNODE(db);
dnode_diduse_space(dn, delta - zio->io_prev_space_delta);
zio->io_prev_space_delta = delta;
- if (BP_IS_HOLE(bp)) {
- ASSERT(bp->blk_fill == 0);
- DB_DNODE_EXIT(db);
- return;
+ if (bp->blk_birth != 0) {
+ ASSERT((db->db_blkid != DMU_SPILL_BLKID &&
+ BP_GET_TYPE(bp) == dn->dn_type) ||
+ (db->db_blkid == DMU_SPILL_BLKID &&
+ BP_GET_TYPE(bp) == dn->dn_bonustype) ||
+ BP_IS_EMBEDDED(bp));
+ ASSERT(BP_GET_LEVEL(bp) == db->db_level);
}
- ASSERT((db->db_blkid != DMU_SPILL_BLKID &&
- BP_GET_TYPE(bp) == dn->dn_type) ||
- (db->db_blkid == DMU_SPILL_BLKID &&
- BP_GET_TYPE(bp) == dn->dn_bonustype));
- ASSERT(BP_GET_LEVEL(bp) == db->db_level);
-
mutex_enter(&db->db_mtx);
#ifdef ZFS_DEBUG
fill++;
}
} else {
- fill = 1;
+ if (BP_IS_HOLE(bp)) {
+ fill = 0;
+ } else {
+ fill = 1;
+ }
}
} else {
blkptr_t *ibp = db->db.db_data;
for (i = db->db.db_size >> SPA_BLKPTRSHIFT; i > 0; i--, ibp++) {
if (BP_IS_HOLE(ibp))
continue;
- fill += ibp->blk_fill;
+ fill += BP_GET_FILL(ibp);
}
}
DB_DNODE_EXIT(db);
- bp->blk_fill = fill;
+ if (!BP_IS_EMBEDDED(bp))
+ bp->blk_fill = fill;
mutex_exit(&db->db_mtx);
}
dbuf_write_done(zio_t *zio, arc_buf_t *buf, void *vdb)
{
dmu_buf_impl_t *db = vdb;
- blkptr_t *bp = zio->io_bp;
blkptr_t *bp_orig = &zio->io_bp_orig;
- uint64_t txg = zio->io_txg;
+ blkptr_t *bp = db->db_blkptr;
+ objset_t *os = db->db_objset;
+ dmu_tx_t *tx = os->os_synctx;
dbuf_dirty_record_t **drp, *dr;
ASSERT0(zio->io_error);
if (zio->io_flags & (ZIO_FLAG_IO_REWRITE | ZIO_FLAG_NOPWRITE)) {
ASSERT(BP_EQUAL(bp, bp_orig));
} else {
- objset_t *os;
- dsl_dataset_t *ds;
- dmu_tx_t *tx;
-
- DB_GET_OBJSET(&os, db);
- ds = os->os_dsl_dataset;
- tx = os->os_synctx;
-
+ dsl_dataset_t *ds = os->os_dsl_dataset;
(void) dsl_dataset_block_kill(ds, bp_orig, tx, B_TRUE);
dsl_dataset_block_born(ds, bp, tx);
}
while ((dr = *drp) != db->db_data_pending)
drp = &dr->dr_next;
ASSERT(!list_link_active(&dr->dr_dirty_node));
- ASSERT(dr->dr_txg == txg);
ASSERT(dr->dr_dbuf == db);
ASSERT(dr->dr_next == NULL);
*drp = dr->dr_next;
DB_DNODE_ENTER(db);
dn = DB_DNODE(db);
ASSERT(list_head(&dr->dt.di.dr_children) == NULL);
- ASSERT3U(db->db.db_size, ==, 1<<dn->dn_phys->dn_indblkshift);
+ ASSERT3U(db->db.db_size, ==, 1 << dn->dn_phys->dn_indblkshift);
if (!BP_IS_HOLE(db->db_blkptr)) {
ASSERTV(int epbs = dn->dn_phys->dn_indblkshift -
SPA_BLKPTRSHIFT);
+ ASSERT3U(db->db_blkid, <=,
+ dn->dn_phys->dn_maxblkid >> (db->db_level * epbs));
ASSERT3U(BP_GET_LSIZE(db->db_blkptr), ==,
db->db.db_size);
- ASSERT3U(dn->dn_phys->dn_maxblkid
- >> (db->db_level * epbs), >=, db->db_blkid);
- arc_set_callback(db->db_buf, dbuf_do_evict, db);
+ if (!arc_released(db->db_buf))
+ arc_set_callback(db->db_buf, dbuf_do_evict, db);
}
DB_DNODE_EXIT(db);
mutex_destroy(&dr->dt.di.dr_mtx);
ASSERT(db->db_dirtycnt > 0);
db->db_dirtycnt -= 1;
db->db_data_pending = NULL;
-
- dbuf_rele_and_unlock(db, (void *)(uintptr_t)txg);
+ dbuf_rele_and_unlock(db, (void *)(uintptr_t)tx->tx_txg);
}
static void
objset_t *os;
dmu_buf_impl_t *parent = db->db_parent;
uint64_t txg = tx->tx_txg;
- zbookmark_t zb;
+ zbookmark_phys_t zb;
zio_prop_t zp;
zio_t *zio;
int wp_flag = 0;
dmu_write_policy(os, dn, db->db_level, wp_flag, &zp);
DB_DNODE_EXIT(db);
- if (db->db_level == 0 && dr->dt.dl.dr_override_state == DR_OVERRIDDEN) {
- ASSERT(db->db_state != DB_NOFILL);
+ if (db->db_level == 0 &&
+ dr->dt.dl.dr_override_state == DR_OVERRIDDEN) {
+ /*
+ * The BP for this block has been provided by open context
+ * (by dmu_sync() or dmu_buf_write_embedded()).
+ */
+ void *contents = (data != NULL) ? data->b_data : NULL;
+
dr->dr_zio = zio_write(zio, os->os_spa, txg,
- db->db_blkptr, data->b_data, arc_buf_size(data), &zp,
+ db->db_blkptr, contents, db->db.db_size, &zp,
dbuf_write_override_ready, NULL, dbuf_write_override_done,
dr, ZIO_PRIORITY_ASYNC_WRITE, ZIO_FLAG_MUSTSUCCEED, &zb);
mutex_enter(&db->db_mtx);
EXPORT_SYMBOL(dmu_buf_update_user);
EXPORT_SYMBOL(dmu_buf_get_user);
EXPORT_SYMBOL(dmu_buf_freeable);
+EXPORT_SYMBOL(dmu_buf_get_blkptr);
#endif