]> git.proxmox.com Git - mirror_zfs.git/blobdiff - module/zfs/dbuf.c
Undo c89 workarounds to match with upstream
[mirror_zfs.git] / module / zfs / dbuf.c
index ebb95ddb8fe7ac20af7700c8476e8d19d164063e..a897db5ddf91fc3088a1e95714c658c9da9cc14a 100644 (file)
@@ -21,7 +21,7 @@
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
- * Copyright (c) 2012, 2015 by Delphix. All rights reserved.
+ * Copyright (c) 2012, 2017 by Delphix. All rights reserved.
  * Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
  * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
  */
@@ -78,12 +78,10 @@ uint_t zfs_dbuf_evict_key;
 static boolean_t dbuf_undirty(dmu_buf_impl_t *db, dmu_tx_t *tx);
 static void dbuf_write(dbuf_dirty_record_t *dr, arc_buf_t *data, dmu_tx_t *tx);
 
-#ifndef __lint
 extern inline void dmu_buf_init_user(dmu_buf_user_t *dbu,
     dmu_buf_evict_func_t *evict_func_sync,
     dmu_buf_evict_func_t *evict_func_async,
     dmu_buf_t **clear_on_evict_dbufp);
-#endif /* ! __lint */
 
 /*
  * Global data structures and functions for the dbuf cache.
@@ -106,7 +104,7 @@ static boolean_t dbuf_evict_thread_exit;
  * Dbufs that are aged out of the cache will be immediately destroyed and
  * become eligible for arc eviction.
  */
-static multilist_t dbuf_cache;
+static multilist_t *dbuf_cache;
 static refcount_t dbuf_cache_size;
 unsigned long  dbuf_cache_max_bytes = 100 * 1024 * 1024;
 
@@ -169,7 +167,6 @@ dbuf_cons(void *vdb, void *unused, int kmflag)
        cv_init(&db->db_changed, NULL, CV_DEFAULT, NULL);
        multilist_link_init(&db->db_cache_link);
        refcount_create(&db->db_holds);
-       multilist_link_init(&db->db_cache_link);
 
        return (0);
 }
@@ -382,7 +379,6 @@ static void
 dbuf_evict_user(dmu_buf_impl_t *db)
 {
        dmu_buf_user_t *dbu = db->db_user;
-       boolean_t has_async;
 
        ASSERT(MUTEX_HELD(&db->db_mtx));
 
@@ -407,7 +403,7 @@ dbuf_evict_user(dmu_buf_impl_t *db)
         * containing the dbu.  In that case we need to take care to not
         * dereference dbu after calling the sync evict func.
         */
-       has_async = (dbu->dbu_evict_func_async != NULL);
+       boolean_t has_async = (dbu->dbu_evict_func_async != NULL);
 
        if (dbu->dbu_evict_func_sync != NULL)
                dbu->dbu_evict_func_sync(dbu);
@@ -467,24 +463,35 @@ dbuf_cache_multilist_index_func(multilist_t *ml, void *obj)
            multilist_get_num_sublists(ml));
 }
 
+static inline unsigned long
+dbuf_cache_target_bytes(void)
+{
+       return MIN(dbuf_cache_max_bytes,
+           arc_target_bytes() >> dbuf_cache_max_shift);
+}
+
 static inline boolean_t
 dbuf_cache_above_hiwater(void)
 {
+       uint64_t dbuf_cache_target = dbuf_cache_target_bytes();
+
        uint64_t dbuf_cache_hiwater_bytes =
-           (dbuf_cache_max_bytes * dbuf_cache_hiwater_pct) / 100;
+           (dbuf_cache_target * dbuf_cache_hiwater_pct) / 100;
 
        return (refcount_count(&dbuf_cache_size) >
-           dbuf_cache_max_bytes + dbuf_cache_hiwater_bytes);
+           dbuf_cache_target + dbuf_cache_hiwater_bytes);
 }
 
 static inline boolean_t
 dbuf_cache_above_lowater(void)
 {
+       uint64_t dbuf_cache_target = dbuf_cache_target_bytes();
+
        uint64_t dbuf_cache_lowater_bytes =
-           (dbuf_cache_max_bytes * dbuf_cache_lowater_pct) / 100;
+           (dbuf_cache_target * dbuf_cache_lowater_pct) / 100;
 
        return (refcount_count(&dbuf_cache_size) >
-           dbuf_cache_max_bytes - dbuf_cache_lowater_bytes);
+           dbuf_cache_target - dbuf_cache_lowater_bytes);
 }
 
 /*
@@ -493,9 +500,9 @@ dbuf_cache_above_lowater(void)
 static void
 dbuf_evict_one(void)
 {
-       int idx = multilist_get_random_index(&dbuf_cache);
-       multilist_sublist_t *mls = multilist_sublist_lock(&dbuf_cache, idx);
-       dmu_buf_impl_t *db;
+       int idx = multilist_get_random_index(dbuf_cache);
+       multilist_sublist_t *mls = multilist_sublist_lock(dbuf_cache, idx);
+
        ASSERT(!MUTEX_HELD(&dbuf_evict_lock));
 
        /*
@@ -506,7 +513,7 @@ dbuf_evict_one(void)
        ASSERT3P(tsd_get(zfs_dbuf_evict_key), ==, NULL);
        (void) tsd_set(zfs_dbuf_evict_key, (void *)B_TRUE);
 
-       db = multilist_sublist_tail(mls);
+       dmu_buf_impl_t *db = multilist_sublist_tail(mls);
        while (db != NULL && mutex_tryenter(&db->db_mtx) == 0) {
                db = multilist_sublist_prev(mls, db);
        }
@@ -533,8 +540,9 @@ dbuf_evict_one(void)
  * of the dbuf cache is at or below the maximum size. Once the dbuf is aged
  * out of the cache it is destroyed and becomes eligible for arc eviction.
  */
+/* ARGSUSED */
 static void
-dbuf_evict_thread(void)
+dbuf_evict_thread(void *unused)
 {
        callb_cpr_t cpr;
 
@@ -599,19 +607,15 @@ dbuf_evict_notify(void)
        if (tsd_get(zfs_dbuf_evict_key) != NULL)
                return;
 
-       if (refcount_count(&dbuf_cache_size) > dbuf_cache_max_bytes) {
-               boolean_t evict_now = B_FALSE;
-
-               mutex_enter(&dbuf_evict_lock);
-               if (refcount_count(&dbuf_cache_size) > dbuf_cache_max_bytes) {
-                       evict_now = dbuf_cache_above_hiwater();
-                       cv_signal(&dbuf_evict_cv);
-               }
-               mutex_exit(&dbuf_evict_lock);
-
-               if (evict_now) {
+       /*
+        * We check if we should evict without holding the dbuf_evict_lock,
+        * because it's OK to occasionally make the wrong decision here,
+        * and grabbing the lock results in massive lock contention.
+        */
+       if (refcount_count(&dbuf_cache_size) > dbuf_cache_target_bytes()) {
+               if (dbuf_cache_above_hiwater())
                        dbuf_evict_one();
-               }
+               cv_signal(&dbuf_evict_cv);
        }
 }
 
@@ -665,7 +669,7 @@ retry:
         * dbuf cache to 1/32nd (default) of the size of the ARC.
         */
        dbuf_cache_max_bytes = MIN(dbuf_cache_max_bytes,
-           arc_max_bytes() >> dbuf_cache_max_shift);
+           arc_target_bytes() >> dbuf_cache_max_shift);
 
        /*
         * All entries are queued via taskq_dispatch_ent(), so min/maxalloc
@@ -673,9 +677,8 @@ retry:
         */
        dbu_evict_taskq = taskq_create("dbu_evict", 1, defclsyspri, 0, 0, 0);
 
-       multilist_create(&dbuf_cache, sizeof (dmu_buf_impl_t),
+       dbuf_cache = multilist_create(sizeof (dmu_buf_impl_t),
            offsetof(dmu_buf_impl_t, db_cache_link),
-           zfs_arc_num_sublists_per_state,
            dbuf_cache_multilist_index_func);
        refcount_create(&dbuf_cache_size);
 
@@ -722,7 +725,7 @@ dbuf_fini(void)
        cv_destroy(&dbuf_evict_cv);
 
        refcount_destroy(&dbuf_cache_size);
-       multilist_destroy(&dbuf_cache);
+       multilist_destroy(dbuf_cache);
 }
 
 /*
@@ -841,7 +844,6 @@ dbuf_verify(dmu_buf_impl_t *db)
                                        ASSERT(buf[i] == 0);
                                }
                        } else {
-                               int i;
                                blkptr_t *bps = db->db.db_data;
                                ASSERT3U(1 << DB_DNODE(db)->dn_indblkshift, ==,
                                    db->db.db_size);
@@ -852,7 +854,7 @@ dbuf_verify(dmu_buf_impl_t *db)
                                 * We iterate through each blkptr and verify
                                 * they only have those fields set.
                                 */
-                               for (i = 0;
+                               for (int i = 0;
                                    i < db->db.db_size / sizeof (blkptr_t);
                                    i++) {
                                        blkptr_t *bp = &bps[i];
@@ -972,7 +974,7 @@ dbuf_whichblock(const dnode_t *dn, const int64_t level, const uint64_t offset)
 }
 
 static void
-dbuf_read_done(zio_t *zio, arc_buf_t *buf, void *vdb)
+dbuf_read_done(zio_t *zio, int err, arc_buf_t *buf, void *vdb)
 {
        dmu_buf_impl_t *db = vdb;
 
@@ -992,7 +994,7 @@ dbuf_read_done(zio_t *zio, arc_buf_t *buf, void *vdb)
                db->db_freed_in_flight = FALSE;
                dbuf_set_data(db, buf);
                db->db_state = DB_CACHED;
-       } else if (zio == NULL || zio->io_error == 0) {
+       } else if (err == 0) {
                dbuf_set_data(db, buf);
                db->db_state = DB_CACHED;
        } else {
@@ -1011,7 +1013,7 @@ dbuf_read_impl(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags)
        dnode_t *dn;
        zbookmark_phys_t zb;
        uint32_t aflags = ARC_FLAG_NOWAIT;
-       int err;
+       int err, zio_flags = 0;
 
        DB_DNODE_ENTER(db);
        dn = DB_DNODE(db);
@@ -1029,6 +1031,22 @@ dbuf_read_impl(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags)
                 */
                int bonuslen = MIN(dn->dn_bonuslen, dn->dn_phys->dn_bonuslen);
                int max_bonuslen = DN_SLOTS_TO_BONUSLEN(dn->dn_num_slots);
+               arc_buf_t *dn_buf = (dn->dn_dbuf != NULL) ?
+                   dn->dn_dbuf->db_buf : NULL;
+
+               /* if the underlying dnode block is encrypted, decrypt it */
+               if (dn_buf != NULL && dn->dn_objset->os_encrypted &&
+                   DMU_OT_IS_ENCRYPTED(dn->dn_bonustype) &&
+                   (flags & DB_RF_NO_DECRYPT) == 0 &&
+                   arc_is_encrypted(dn_buf)) {
+                       err = arc_untransform(dn_buf, dn->dn_objset->os_spa,
+                           dmu_objset_id(dn->dn_objset), B_TRUE);
+                       if (err != 0) {
+                               DB_DNODE_EXIT(db);
+                               mutex_exit(&db->db_mtx);
+                               return (err);
+                       }
+               }
 
                ASSERT3U(bonuslen, <=, db->db.db_size);
                db->db.db_data = kmem_alloc(max_bonuslen, KM_SLEEP);
@@ -1061,8 +1079,7 @@ dbuf_read_impl(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags)
                    BP_IS_HOLE(db->db_blkptr) &&
                    db->db_blkptr->blk_birth != 0) {
                        blkptr_t *bps = db->db.db_data;
-                       int i;
-                       for (i = 0; i < ((1 <<
+                       for (int i = 0; i < ((1 <<
                            DB_DNODE(db)->dn_indblkshift) / sizeof (blkptr_t));
                            i++) {
                                blkptr_t *bp = &bps[i];
@@ -1096,11 +1113,27 @@ dbuf_read_impl(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags)
            db->db_objset->os_dsl_dataset->ds_object : DMU_META_OBJSET,
            db->db.db_object, db->db_level, db->db_blkid);
 
+       /*
+        * All bps of an encrypted os should have the encryption bit set.
+        * If this is not true it indicates tampering and we report an error.
+        */
+       if (db->db_objset->os_encrypted && !BP_USES_CRYPT(db->db_blkptr)) {
+               spa_log_error(db->db_objset->os_spa, &zb);
+               zfs_panic_recover("unencrypted block in encrypted "
+                   "object set %llu", dmu_objset_id(db->db_objset));
+               return (SET_ERROR(EIO));
+       }
+
        dbuf_add_ref(db, NULL);
 
+       zio_flags = (flags & DB_RF_CANFAIL) ?
+           ZIO_FLAG_CANFAIL : ZIO_FLAG_MUSTSUCCEED;
+
+       if ((flags & DB_RF_NO_DECRYPT) && BP_IS_PROTECTED(db->db_blkptr))
+               zio_flags |= ZIO_FLAG_RAW;
+
        err = arc_read(zio, db->db_objset->os_spa, db->db_blkptr,
-           dbuf_read_done, db, ZIO_PRIORITY_SYNC_READ,
-           (flags & DB_RF_CANFAIL) ? ZIO_FLAG_CANFAIL : ZIO_FLAG_MUSTSUCCEED,
+           dbuf_read_done, db, ZIO_PRIORITY_SYNC_READ, zio_flags,
            &aflags, &zb);
 
        return (err);
@@ -1141,7 +1174,7 @@ dbuf_fix_old_data(dmu_buf_impl_t *db, uint64_t txg)
         * or (if there a no active holders)
         *      just null out the current db_data pointer.
         */
-       ASSERT(dr->dr_txg >= txg - 2);
+       ASSERT3U(dr->dr_txg, >=, txg - 2);
        if (db->db_blkid == DMU_BONUS_BLKID) {
                dnode_t *dn = DB_DNODE(db);
                int bonuslen = DN_SLOTS_TO_BONUSLEN(dn->dn_num_slots);
@@ -1149,18 +1182,31 @@ dbuf_fix_old_data(dmu_buf_impl_t *db, uint64_t txg)
                arc_space_consume(bonuslen, ARC_SPACE_BONUS);
                bcopy(db->db.db_data, dr->dt.dl.dr_data, bonuslen);
        } else if (refcount_count(&db->db_holds) > db->db_dirtycnt) {
+               dnode_t *dn = DB_DNODE(db);
                int size = arc_buf_size(db->db_buf);
                arc_buf_contents_t type = DBUF_GET_BUFC_TYPE(db);
                spa_t *spa = db->db_objset->os_spa;
                enum zio_compress compress_type =
                    arc_get_compression(db->db_buf);
 
-               if (compress_type == ZIO_COMPRESS_OFF) {
-                       dr->dt.dl.dr_data = arc_alloc_buf(spa, db, type, size);
-               } else {
+               if (arc_is_encrypted(db->db_buf)) {
+                       boolean_t byteorder;
+                       uint8_t salt[ZIO_DATA_SALT_LEN];
+                       uint8_t iv[ZIO_DATA_IV_LEN];
+                       uint8_t mac[ZIO_DATA_MAC_LEN];
+
+                       arc_get_raw_params(db->db_buf, &byteorder, salt,
+                           iv, mac);
+                       dr->dt.dl.dr_data = arc_alloc_raw_buf(spa, db,
+                           dmu_objset_id(dn->dn_objset), byteorder, salt, iv,
+                           mac, dn->dn_type, size, arc_buf_lsize(db->db_buf),
+                           compress_type);
+               } else if (compress_type != ZIO_COMPRESS_OFF) {
                        ASSERT3U(type, ==, ARC_BUFC_DATA);
                        dr->dt.dl.dr_data = arc_alloc_compressed_buf(spa, db,
                            size, arc_buf_lsize(db->db_buf), compress_type);
+               } else {
+                       dr->dt.dl.dr_data = arc_alloc_buf(spa, db, type, size);
                }
                bcopy(db->db.db_data, dr->dt.dl.dr_data->b_data, size);
        } else {
@@ -1173,7 +1219,6 @@ int
 dbuf_read(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags)
 {
        int err = 0;
-       boolean_t havepzio = (zio != NULL);
        boolean_t prefetch;
        dnode_t *dn;
 
@@ -1197,16 +1242,21 @@ dbuf_read(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags)
 
        mutex_enter(&db->db_mtx);
        if (db->db_state == DB_CACHED) {
+               spa_t *spa = dn->dn_objset->os_spa;
+
                /*
-                * If the arc buf is compressed, we need to decompress it to
-                * read the data. This could happen during the "zfs receive" of
-                * a stream which is compressed and deduplicated.
+                * If the arc buf is compressed or encrypted, we need to
+                * untransform it to read the data. This could happen during
+                * the "zfs receive" of a stream which is deduplicated and
+                * either raw or compressed. We do not need to do this if the
+                * caller wants raw encrypted data.
                 */
-               if (db->db_buf != NULL &&
-                   arc_get_compression(db->db_buf) != ZIO_COMPRESS_OFF) {
-                       dbuf_fix_old_data(db,
-                           spa_syncing_txg(dmu_objset_spa(db->db_objset)));
-                       err = arc_decompress(db->db_buf);
+               if (db->db_buf != NULL && (flags & DB_RF_NO_DECRYPT) == 0 &&
+                   (arc_is_encrypted(db->db_buf) ||
+                   arc_get_compression(db->db_buf) != ZIO_COMPRESS_OFF)) {
+                       dbuf_fix_old_data(db, spa_syncing_txg(spa));
+                       err = arc_untransform(db->db_buf, spa,
+                           dmu_objset_id(db->db_objset), B_FALSE);
                        dbuf_set_data(db, db->db_buf);
                }
                mutex_exit(&db->db_mtx);
@@ -1217,11 +1267,13 @@ dbuf_read(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags)
                DB_DNODE_EXIT(db);
        } else if (db->db_state == DB_UNCACHED) {
                spa_t *spa = dn->dn_objset->os_spa;
+               boolean_t need_wait = B_FALSE;
 
                if (zio == NULL &&
-                   db->db_blkptr != NULL && !BP_IS_HOLE(db->db_blkptr))
+                   db->db_blkptr != NULL && !BP_IS_HOLE(db->db_blkptr)) {
                        zio = zio_root(spa, NULL, NULL, ZIO_FLAG_CANFAIL);
-
+                       need_wait = B_TRUE;
+               }
                err = dbuf_read_impl(db, zio, flags);
 
                /* dbuf_read_impl has dropped db_mtx for us */
@@ -1233,7 +1285,7 @@ dbuf_read(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags)
                        rw_exit(&dn->dn_struct_rwlock);
                DB_DNODE_EXIT(db);
 
-               if (!err && !havepzio && zio != NULL)
+               if (!err && need_wait)
                        err = zio_wait(zio);
        } else {
                /*
@@ -1268,7 +1320,6 @@ dbuf_read(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags)
                mutex_exit(&db->db_mtx);
        }
 
-       ASSERT(err || havepzio || db->db_state == DB_CACHED);
        return (err);
 }
 
@@ -1304,6 +1355,11 @@ dbuf_unoverride(dbuf_dirty_record_t *dr)
        uint64_t txg = dr->dr_txg;
 
        ASSERT(MUTEX_HELD(&db->db_mtx));
+       /*
+        * This assert is valid because dmu_sync() expects to be called by
+        * a zilog's get_data while holding a range lock.  This call only
+        * comes from dbuf_dirty() callers who must also hold a range lock.
+        */
        ASSERT(dr->dt.dl.dr_override_state != DR_IN_DMU_SYNC);
        ASSERT(db->db_level == 0);
 
@@ -1319,6 +1375,7 @@ dbuf_unoverride(dbuf_dirty_record_t *dr)
 
        dr->dt.dl.dr_override_state = DR_NOT_OVERRIDDEN;
        dr->dt.dl.dr_nopwrite = B_FALSE;
+       dr->dt.dl.dr_raw = B_FALSE;
 
        /*
         * Release the already-written buffer, so we leave it in
@@ -1435,41 +1492,6 @@ dbuf_free_range(dnode_t *dn, uint64_t start_blkid, uint64_t end_blkid,
        mutex_exit(&dn->dn_dbufs_mtx);
 }
 
-static int
-dbuf_block_freeable(dmu_buf_impl_t *db)
-{
-       dsl_dataset_t *ds = db->db_objset->os_dsl_dataset;
-       uint64_t birth_txg = 0;
-
-       /*
-        * We don't need any locking to protect db_blkptr:
-        * If it's syncing, then db_last_dirty will be set
-        * so we'll ignore db_blkptr.
-        *
-        * This logic ensures that only block births for
-        * filled blocks are considered.
-        */
-       ASSERT(MUTEX_HELD(&db->db_mtx));
-       if (db->db_last_dirty && (db->db_blkptr == NULL ||
-           !BP_IS_HOLE(db->db_blkptr))) {
-               birth_txg = db->db_last_dirty->dr_txg;
-       } else if (db->db_blkptr != NULL && !BP_IS_HOLE(db->db_blkptr)) {
-               birth_txg = db->db_blkptr->blk_birth;
-       }
-
-       /*
-        * If this block don't exist or is in a snapshot, it can't be freed.
-        * Don't pass the bp to dsl_dataset_block_freeable() since we
-        * are holding the db_mtx lock and might deadlock if we are
-        * prefetching a dedup-ed block.
-        */
-       if (birth_txg != 0)
-               return (ds == NULL ||
-                   dsl_dataset_block_freeable(ds, NULL, birth_txg));
-       else
-               return (B_FALSE);
-}
-
 void
 dbuf_new_size(dmu_buf_impl_t *db, int size, dmu_tx_t *tx)
 {
@@ -1519,7 +1541,7 @@ dbuf_new_size(dmu_buf_impl_t *db, int size, dmu_tx_t *tx)
        }
        mutex_exit(&db->db_mtx);
 
-       dnode_willuse_space(dn, size-osize, tx);
+       dmu_objset_willuse_space(dn->dn_objset, size - osize, tx);
        DB_DNODE_EXIT(db);
 }
 
@@ -1569,7 +1591,6 @@ dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx)
        objset_t *os;
        dbuf_dirty_record_t **drp, *dr;
        int drop_struct_lock = FALSE;
-       boolean_t do_free_accounting = B_FALSE;
        int txgoff = tx->tx_txg & TXG_MASK;
 
        ASSERT(tx->tx_txg != 0);
@@ -1583,10 +1604,18 @@ dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx)
         * objects may be dirtied in syncing context, but only if they
         * were already pre-dirtied in open context.
         */
+#ifdef DEBUG
+       if (dn->dn_objset->os_dsl_dataset != NULL) {
+               rrw_enter(&dn->dn_objset->os_dsl_dataset->ds_bp_rwlock,
+                   RW_READER, FTAG);
+       }
        ASSERT(!dmu_tx_is_syncing(tx) ||
            BP_IS_HOLE(dn->dn_objset->os_rootbp) ||
            DMU_OBJECT_IS_SPECIAL(dn->dn_object) ||
            dn->dn_objset->os_dsl_dataset == NULL);
+       if (dn->dn_objset->os_dsl_dataset != NULL)
+               rrw_exit(&dn->dn_objset->os_dsl_dataset->ds_bp_rwlock, FTAG);
+#endif
        /*
         * We make this assert for private objects as well, but after we
         * check if we're already dirty.  They are allowed to re-dirty
@@ -1611,12 +1640,21 @@ dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx)
         * Don't set dirtyctx to SYNC if we're just modifying this as we
         * initialize the objset.
         */
-       if (dn->dn_dirtyctx == DN_UNDIRTIED &&
-           !BP_IS_HOLE(dn->dn_objset->os_rootbp)) {
-               dn->dn_dirtyctx =
-                   (dmu_tx_is_syncing(tx) ? DN_DIRTY_SYNC : DN_DIRTY_OPEN);
-               ASSERT(dn->dn_dirtyctx_firstset == NULL);
-               dn->dn_dirtyctx_firstset = kmem_alloc(1, KM_SLEEP);
+       if (dn->dn_dirtyctx == DN_UNDIRTIED) {
+               if (dn->dn_objset->os_dsl_dataset != NULL) {
+                       rrw_enter(&dn->dn_objset->os_dsl_dataset->ds_bp_rwlock,
+                           RW_READER, FTAG);
+               }
+               if (!BP_IS_HOLE(dn->dn_objset->os_rootbp)) {
+                       dn->dn_dirtyctx = (dmu_tx_is_syncing(tx) ?
+                           DN_DIRTY_SYNC : DN_DIRTY_OPEN);
+                       ASSERT(dn->dn_dirtyctx_firstset == NULL);
+                       dn->dn_dirtyctx_firstset = kmem_alloc(1, KM_SLEEP);
+               }
+               if (dn->dn_objset->os_dsl_dataset != NULL) {
+                       rrw_exit(&dn->dn_objset->os_dsl_dataset->ds_bp_rwlock,
+                           FTAG);
+               }
        }
        mutex_exit(&dn->dn_mtx);
 
@@ -1647,11 +1685,6 @@ dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx)
            (dmu_tx_is_syncing(tx) ? DN_DIRTY_SYNC : DN_DIRTY_OPEN));
 
        ASSERT3U(dn->dn_nlevels, >, db->db_level);
-       ASSERT((dn->dn_phys->dn_nlevels == 0 && db->db_level == 0) ||
-           dn->dn_phys->dn_nlevels > db->db_level ||
-           dn->dn_next_nlevels[txgoff] > db->db_level ||
-           dn->dn_next_nlevels[(tx->tx_txg-1) & TXG_MASK] > db->db_level ||
-           dn->dn_next_nlevels[(tx->tx_txg-2) & TXG_MASK] > db->db_level);
 
        /*
         * We should only be dirtying in syncing context if it's the
@@ -1661,22 +1694,21 @@ dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx)
         * this assertion only if we're not already dirty.
         */
        os = dn->dn_objset;
+       VERIFY3U(tx->tx_txg, <=, spa_final_dirty_txg(os->os_spa));
+#ifdef DEBUG
+       if (dn->dn_objset->os_dsl_dataset != NULL)
+               rrw_enter(&os->os_dsl_dataset->ds_bp_rwlock, RW_READER, FTAG);
        ASSERT(!dmu_tx_is_syncing(tx) || DMU_OBJECT_IS_SPECIAL(dn->dn_object) ||
            os->os_dsl_dataset == NULL || BP_IS_HOLE(os->os_rootbp));
+       if (dn->dn_objset->os_dsl_dataset != NULL)
+               rrw_exit(&os->os_dsl_dataset->ds_bp_rwlock, FTAG);
+#endif
        ASSERT(db->db.db_size != 0);
 
        dprintf_dbuf(db, "size=%llx\n", (u_longlong_t)db->db.db_size);
 
        if (db->db_blkid != DMU_BONUS_BLKID) {
-               /*
-                * Update the accounting.
-                * Note: we delay "free accounting" until after we drop
-                * the db_mtx.  This keeps us from grabbing other locks
-                * (and possibly deadlocking) in bp_get_dsize() while
-                * also holding the db_mtx.
-                */
-               dnode_willuse_space(dn, db->db.db_size, tx);
-               do_free_accounting = dbuf_block_freeable(db);
+               dmu_objset_willuse_space(os, db->db.db_size, tx);
        }
 
        /*
@@ -1770,21 +1802,23 @@ dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx)
                drop_struct_lock = TRUE;
        }
 
-       if (do_free_accounting) {
-               blkptr_t *bp = db->db_blkptr;
-               int64_t willfree = (bp && !BP_IS_HOLE(bp)) ?
-                   bp_get_dsize(os->os_spa, bp) : db->db.db_size;
-               /*
-                * This is only a guess -- if the dbuf is dirty
-                * in a previous txg, we don't know how much
-                * space it will use on disk yet.  We should
-                * really have the struct_rwlock to access
-                * db_blkptr, but since this is just a guess,
-                * it's OK if we get an odd answer.
-                */
-               ddt_prefetch(os->os_spa, bp);
-               dnode_willuse_space(dn, -willfree, tx);
-       }
+       /*
+        * We need to hold the dn_struct_rwlock to make this assertion,
+        * because it protects dn_phys / dn_next_nlevels from changing.
+        */
+       ASSERT((dn->dn_phys->dn_nlevels == 0 && db->db_level == 0) ||
+           dn->dn_phys->dn_nlevels > db->db_level ||
+           dn->dn_next_nlevels[txgoff] > db->db_level ||
+           dn->dn_next_nlevels[(tx->tx_txg-1) & TXG_MASK] > db->db_level ||
+           dn->dn_next_nlevels[(tx->tx_txg-2) & TXG_MASK] > db->db_level);
+
+       /*
+        * If we are overwriting a dedup BP, then unless it is snapshotted,
+        * when we get to syncing context we will need to decrement its
+        * refcount in the DDT.  Prefetch the relevant DDT block so that
+        * syncing context won't have to wait for the i/o.
+        */
+       ddt_prefetch(os->os_spa, db->db_blkptr);
 
        if (db->db_level == 0) {
                dnode_new_blkid(dn, db->db_blkid, tx, drop_struct_lock);
@@ -1934,12 +1968,10 @@ dbuf_undirty(dmu_buf_impl_t *db, dmu_tx_t *tx)
        return (B_FALSE);
 }
 
-void
-dmu_buf_will_dirty(dmu_buf_t *db_fake, dmu_tx_t *tx)
+static void
+dmu_buf_will_dirty_impl(dmu_buf_t *db_fake, int flags, dmu_tx_t *tx)
 {
        dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake;
-       int rf = DB_RF_MUST_SUCCEED | DB_RF_NOPREFETCH;
-       dbuf_dirty_record_t *dr;
 
        ASSERT(tx->tx_txg != 0);
        ASSERT(!refcount_is_zero(&db->db_holds));
@@ -1952,6 +1984,7 @@ dmu_buf_will_dirty(dmu_buf_t *db_fake, dmu_tx_t *tx)
         */
        mutex_enter(&db->db_mtx);
 
+       dbuf_dirty_record_t *dr;
        for (dr = db->db_last_dirty;
            dr != NULL && dr->dr_txg >= tx->tx_txg; dr = dr->dr_next) {
                /*
@@ -1970,12 +2003,19 @@ dmu_buf_will_dirty(dmu_buf_t *db_fake, dmu_tx_t *tx)
 
        DB_DNODE_ENTER(db);
        if (RW_WRITE_HELD(&DB_DNODE(db)->dn_struct_rwlock))
-               rf |= DB_RF_HAVESTRUCT;
+               flags |= DB_RF_HAVESTRUCT;
        DB_DNODE_EXIT(db);
-       (void) dbuf_read(db, NULL, rf);
+       (void) dbuf_read(db, NULL, flags);
        (void) dbuf_dirty(db, tx);
 }
 
+void
+dmu_buf_will_dirty(dmu_buf_t *db_fake, dmu_tx_t *tx)
+{
+       dmu_buf_will_dirty_impl(db_fake,
+           DB_RF_MUST_SUCCEED | DB_RF_NOPREFETCH, tx);
+}
+
 void
 dmu_buf_will_not_fill(dmu_buf_t *db_fake, dmu_tx_t *tx)
 {
@@ -2003,6 +2043,29 @@ dmu_buf_will_fill(dmu_buf_t *db_fake, dmu_tx_t *tx)
        (void) dbuf_dirty(db, tx);
 }
 
+/*
+ * This function is effectively the same as dmu_buf_will_dirty(), but
+ * indicates the caller expects raw encrypted data in the db. It will
+ * also set the raw flag on the created dirty record.
+ */
+void
+dmu_buf_will_change_crypt_params(dmu_buf_t *db_fake, dmu_tx_t *tx)
+{
+       dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake;
+       dbuf_dirty_record_t *dr;
+
+       dmu_buf_will_dirty_impl(db_fake,
+           DB_RF_MUST_SUCCEED | DB_RF_NOPREFETCH | DB_RF_NO_DECRYPT, tx);
+
+       dr = db->db_last_dirty;
+       while (dr != NULL && dr->dr_txg > tx->tx_txg)
+               dr = dr->dr_next;
+
+       ASSERT3P(dr, !=, NULL);
+       ASSERT3U(dr->dr_txg, ==, tx->tx_txg);
+       dr->dt.dl.dr_raw = B_TRUE;
+}
+
 #pragma weak dmu_buf_fill_done = dbuf_fill_done
 /* ARGSUSED */
 void
@@ -2089,6 +2152,13 @@ dbuf_assign_arcbuf(dmu_buf_impl_t *db, arc_buf_t *buf, dmu_tx_t *tx)
 
        if (db->db_state == DB_CACHED &&
            refcount_count(&db->db_holds) - 1 > db->db_dirtycnt) {
+               /*
+                * In practice, we will never have a case where we have an
+                * encrypted arc buffer while additional holds exist on the
+                * dbuf. We don't handle this here so we simply assert that
+                * fact instead.
+                */
+               ASSERT(!arc_is_encrypted(buf));
                mutex_exit(&db->db_mtx);
                (void) dbuf_dirty(db, tx);
                bcopy(buf->b_data, db->db.db_data, db->db.db_size);
@@ -2104,6 +2174,8 @@ dbuf_assign_arcbuf(dmu_buf_impl_t *db, arc_buf_t *buf, dmu_tx_t *tx)
                ASSERT(db->db_buf != NULL);
                if (dr != NULL && dr->dr_txg == tx->tx_txg) {
                        ASSERT(dr->dt.dl.dr_data == db->db_buf);
+                       IMPLY(arc_is_encrypted(buf), dr->dt.dl.dr_raw);
+
                        if (!arc_released(db->db_buf)) {
                                ASSERT(dr->dt.dl.dr_override_state ==
                                    DR_OVERRIDDEN);
@@ -2143,16 +2215,17 @@ dbuf_destroy(dmu_buf_impl_t *db)
        if (db->db_blkid == DMU_BONUS_BLKID) {
                int slots = DB_DNODE(db)->dn_num_slots;
                int bonuslen = DN_SLOTS_TO_BONUSLEN(slots);
-               ASSERT(db->db.db_data != NULL);
-               kmem_free(db->db.db_data, bonuslen);
-               arc_space_return(bonuslen, ARC_SPACE_BONUS);
-               db->db_state = DB_UNCACHED;
+               if (db->db.db_data != NULL) {
+                       kmem_free(db->db.db_data, bonuslen);
+                       arc_space_return(bonuslen, ARC_SPACE_BONUS);
+                       db->db_state = DB_UNCACHED;
+               }
        }
 
        dbuf_clear_data(db);
 
        if (multilist_link_active(&db->db_cache_link)) {
-               multilist_remove(&dbuf_cache, db);
+               multilist_remove(dbuf_cache, db);
                (void) refcount_remove_many(&dbuf_cache_size,
                    db->db.db_size, db);
        }
@@ -2232,8 +2305,6 @@ static inline int
 dbuf_findbp(dnode_t *dn, int level, uint64_t blkid, int fail_sparse,
     dmu_buf_impl_t **parentp, blkptr_t **bpp, struct dbuf_hold_impl_data *dh)
 {
-       int nlevels, epbs;
-
        *parentp = NULL;
        *bpp = NULL;
 
@@ -2252,9 +2323,9 @@ dbuf_findbp(dnode_t *dn, int level, uint64_t blkid, int fail_sparse,
                return (0);
        }
 
-       nlevels =
+       int nlevels =
            (dn->dn_phys->dn_nlevels == 0) ? 1 : dn->dn_phys->dn_nlevels;
-       epbs = dn->dn_indblkshift - SPA_BLKPTRSHIFT;
+       int epbs = dn->dn_indblkshift - SPA_BLKPTRSHIFT;
 
        ASSERT3U(level * epbs, <, 64);
        ASSERT(RW_LOCK_HELD(&dn->dn_struct_rwlock));
@@ -2422,11 +2493,11 @@ typedef struct dbuf_prefetch_arg {
 static void
 dbuf_issue_final_prefetch(dbuf_prefetch_arg_t *dpa, blkptr_t *bp)
 {
-       arc_flags_t aflags;
        if (BP_IS_HOLE(bp) || BP_IS_EMBEDDED(bp))
                return;
 
-       aflags = dpa->dpa_aflags | ARC_FLAG_NOWAIT | ARC_FLAG_PREFETCH;
+       arc_flags_t aflags =
+           dpa->dpa_aflags | ARC_FLAG_NOWAIT | ARC_FLAG_PREFETCH;
 
        ASSERT3U(dpa->dpa_curlevel, ==, BP_GET_LEVEL(bp));
        ASSERT3U(dpa->dpa_curlevel, ==, dpa->dpa_zb.zb_level);
@@ -2442,11 +2513,9 @@ dbuf_issue_final_prefetch(dbuf_prefetch_arg_t *dpa, blkptr_t *bp)
  * prefetch if the next block down is our target.
  */
 static void
-dbuf_prefetch_indirect_done(zio_t *zio, arc_buf_t *abuf, void *private)
+dbuf_prefetch_indirect_done(zio_t *zio, int err, arc_buf_t *abuf, void *private)
 {
        dbuf_prefetch_arg_t *dpa = private;
-       uint64_t nextblkid;
-       blkptr_t *bp;
 
        ASSERT3S(dpa->dpa_zb.zb_level, <, dpa->dpa_curlevel);
        ASSERT3S(dpa->dpa_curlevel, >, 0);
@@ -2464,7 +2533,7 @@ dbuf_prefetch_indirect_done(zio_t *zio, arc_buf_t *abuf, void *private)
         */
        if (zio != NULL) {
                ASSERT3S(BP_GET_LEVEL(zio->io_bp), ==, dpa->dpa_curlevel);
-               if (zio->io_flags & ZIO_FLAG_RAW) {
+               if (zio->io_flags & ZIO_FLAG_RAW_COMPRESS) {
                        ASSERT3U(BP_GET_PSIZE(zio->io_bp), ==, zio->io_size);
                } else {
                        ASSERT3U(BP_GET_LSIZE(zio->io_bp), ==, zio->io_size);
@@ -2485,11 +2554,11 @@ dbuf_prefetch_indirect_done(zio_t *zio, arc_buf_t *abuf, void *private)
 
        dpa->dpa_curlevel--;
 
-       nextblkid = dpa->dpa_zb.zb_blkid >>
+       uint64_t nextblkid = dpa->dpa_zb.zb_blkid >>
            (dpa->dpa_epbs * (dpa->dpa_curlevel - dpa->dpa_zb.zb_level));
-       bp = ((blkptr_t *)abuf->b_data) +
+       blkptr_t *bp = ((blkptr_t *)abuf->b_data) +
            P2PHASE(nextblkid, 1ULL << dpa->dpa_epbs);
-       if (BP_IS_HOLE(bp) || (zio != NULL && zio->io_error != 0)) {
+       if (BP_IS_HOLE(bp) || err != 0) {
                kmem_free(dpa, sizeof (*dpa));
        } else if (dpa->dpa_curlevel == dpa->dpa_zb.zb_level) {
                ASSERT3U(nextblkid, ==, dpa->dpa_zb.zb_blkid);
@@ -2517,7 +2586,8 @@ dbuf_prefetch_indirect_done(zio_t *zio, arc_buf_t *abuf, void *private)
  * Issue prefetch reads for the given block on the given level.  If the indirect
  * blocks above that block are not in memory, we will read them in
  * asynchronously.  As a result, this call never blocks waiting for a read to
- * complete.
+ * complete. Note that the prefetch might fail if the dataset is encrypted and
+ * the encryption key is unmapped before the IO completes.
  */
 void
 dbuf_prefetch(dnode_t *dn, int64_t level, uint64_t blkid, zio_priority_t prio,
@@ -2526,10 +2596,6 @@ dbuf_prefetch(dnode_t *dn, int64_t level, uint64_t blkid, zio_priority_t prio,
        blkptr_t bp;
        int epbs, nlevels, curlevel;
        uint64_t curblkid;
-       dmu_buf_impl_t *db;
-       zio_t *pio;
-       dbuf_prefetch_arg_t *dpa;
-       dsl_dataset_t *ds;
 
        ASSERT(blkid != DMU_BONUS_BLKID);
        ASSERT(RW_LOCK_HELD(&dn->dn_struct_rwlock));
@@ -2552,7 +2618,7 @@ dbuf_prefetch(dnode_t *dn, int64_t level, uint64_t blkid, zio_priority_t prio,
        if (dn->dn_phys->dn_maxblkid < blkid << (epbs * level))
                return;
 
-       db = dbuf_find(dn->dn_objset, dn->dn_object,
+       dmu_buf_impl_t *db = dbuf_find(dn->dn_objset, dn->dn_object,
            level, blkid);
        if (db != NULL) {
                mutex_exit(&db->db_mtx);
@@ -2597,11 +2663,11 @@ dbuf_prefetch(dnode_t *dn, int64_t level, uint64_t blkid, zio_priority_t prio,
 
        ASSERT3U(curlevel, ==, BP_GET_LEVEL(&bp));
 
-       pio = zio_root(dmu_objset_spa(dn->dn_objset), NULL, NULL,
+       zio_t *pio = zio_root(dmu_objset_spa(dn->dn_objset), NULL, NULL,
            ZIO_FLAG_CANFAIL);
 
-       dpa = kmem_zalloc(sizeof (*dpa), KM_SLEEP);
-       ds = dn->dn_objset->os_dsl_dataset;
+       dbuf_prefetch_arg_t *dpa = kmem_zalloc(sizeof (*dpa), KM_SLEEP);
+       dsl_dataset_t *ds = dn->dn_objset->os_dsl_dataset;
        SET_BOOKMARK(&dpa->dpa_zb, ds != NULL ? ds->ds_object : DMU_META_OBJSET,
            dn->dn_object, level, blkid);
        dpa->dpa_curlevel = curlevel;
@@ -2722,7 +2788,7 @@ __dbuf_hold_impl(struct dbuf_hold_impl_data *dh)
 
        if (multilist_link_active(&dh->dh_db->db_cache_link)) {
                ASSERT(refcount_is_zero(&dh->dh_db->db_holds));
-               multilist_remove(&dbuf_cache, dh->dh_db);
+               multilist_remove(dbuf_cache, dh->dh_db);
                (void) refcount_remove_many(&dbuf_cache_size,
                    dh->dh_db->db.db_size, dh->dh_db);
        }
@@ -2994,7 +3060,7 @@ dbuf_rele_and_unlock(dmu_buf_impl_t *db, void *tag)
                            db->db_pending_evict) {
                                dbuf_destroy(db);
                        } else if (!multilist_link_active(&db->db_cache_link)) {
-                               multilist_insert(&dbuf_cache, db);
+                               multilist_insert(dbuf_cache, db);
                                (void) refcount_add_many(&dbuf_cache_size,
                                    db->db.db_size, db);
                                mutex_exit(&db->db_mtx);
@@ -3072,19 +3138,6 @@ dmu_buf_user_evict_wait()
        taskq_wait(dbu_evict_taskq);
 }
 
-boolean_t
-dmu_buf_freeable(dmu_buf_t *dbuf)
-{
-       boolean_t res = B_FALSE;
-       dmu_buf_impl_t *db = (dmu_buf_impl_t *)dbuf;
-
-       if (db->db_blkptr)
-               res = dsl_dataset_block_freeable(db->db_objset->os_dsl_dataset,
-                   db->db_blkptr, db->db_blkptr->blk_birth);
-
-       return (res);
-}
-
 blkptr_t *
 dmu_buf_get_blkptr(dmu_buf_t *db)
 {
@@ -3159,6 +3212,41 @@ dbuf_check_blkptr(dnode_t *dn, dmu_buf_impl_t *db)
        }
 }
 
+/*
+ * Ensure the dbuf's data is untransformed if the associated dirty
+ * record requires it. This is used by dbuf_sync_leaf() to ensure
+ * that a dnode block is decrypted before we write new data to it.
+ * For raw writes we assert that the buffer is already encrypted.
+ */
+static void
+dbuf_check_crypt(dbuf_dirty_record_t *dr)
+{
+       int err;
+       dmu_buf_impl_t *db = dr->dr_dbuf;
+
+       ASSERT(MUTEX_HELD(&db->db_mtx));
+
+       if (!dr->dt.dl.dr_raw && arc_is_encrypted(db->db_buf)) {
+               /*
+                * Unfortunately, there is currently no mechanism for
+                * syncing context to handle decryption errors. An error
+                * here is only possible if an attacker maliciously
+                * changed a dnode block and updated the associated
+                * checksums going up the block tree.
+                */
+               err = arc_untransform(db->db_buf, db->db_objset->os_spa,
+                   dmu_objset_id(db->db_objset), B_TRUE);
+               if (err)
+                       panic("Invalid dnode block MAC");
+       } else if (dr->dt.dl.dr_raw) {
+               /*
+                * Writing raw encrypted data requires the db's arc buffer
+                * to be converted to raw by the caller.
+                */
+               ASSERT(arc_is_encrypted(db->db_buf));
+       }
+}
+
 /*
  * dbuf_sync_indirect() is called recursively from dbuf_sync_list() so it
  * is critical the we not allow the compiler to inline this function in to
@@ -3280,9 +3368,10 @@ dbuf_sync_leaf(dbuf_dirty_record_t *dr, dmu_tx_t *tx)
 
                ASSERT(*datap != NULL);
                ASSERT0(db->db_level);
-               ASSERT3U(dn->dn_phys->dn_bonuslen, <=,
+               ASSERT3U(DN_MAX_BONUS_LEN(dn->dn_phys), <=,
                    DN_SLOTS_TO_BONUSLEN(dn->dn_phys->dn_extra_slots + 1));
-               bcopy(*datap, DN_BONUS(dn->dn_phys), dn->dn_phys->dn_bonuslen);
+               bcopy(*datap, DN_BONUS(dn->dn_phys),
+                   DN_MAX_BONUS_LEN(dn->dn_phys));
                DB_DNODE_EXIT(db);
 
                if (*datap != db->db.db_data) {
@@ -3329,6 +3418,13 @@ dbuf_sync_leaf(dbuf_dirty_record_t *dr, dmu_tx_t *tx)
                ASSERT(dr->dt.dl.dr_override_state != DR_NOT_OVERRIDDEN);
        }
 
+       /*
+        * If this is a dnode block, ensure it is appropriately encrypted
+        * or decrypted, depending on what we are writing to it this txg.
+        */
+       if (os->os_encrypted && dn->dn_object == DMU_META_DNODE_OBJECT)
+               dbuf_check_crypt(dr);
+
        if (db->db_state != DB_NOFILL &&
            dn->dn_object != DMU_META_DNODE_OBJECT &&
            refcount_count(&db->db_holds) > 1 &&
@@ -3346,16 +3442,26 @@ dbuf_sync_leaf(dbuf_dirty_record_t *dr, dmu_tx_t *tx)
                 * DNONE_DNODE blocks).
                 */
                int psize = arc_buf_size(*datap);
+               int lsize = arc_buf_lsize(*datap);
                arc_buf_contents_t type = DBUF_GET_BUFC_TYPE(db);
                enum zio_compress compress_type = arc_get_compression(*datap);
 
-               if (compress_type == ZIO_COMPRESS_OFF) {
-                       *datap = arc_alloc_buf(os->os_spa, db, type, psize);
-               } else {
-                       int lsize = arc_buf_lsize(*datap);
+               if (arc_is_encrypted(*datap)) {
+                       boolean_t byteorder;
+                       uint8_t salt[ZIO_DATA_SALT_LEN];
+                       uint8_t iv[ZIO_DATA_IV_LEN];
+                       uint8_t mac[ZIO_DATA_MAC_LEN];
+
+                       arc_get_raw_params(*datap, &byteorder, salt, iv, mac);
+                       *datap = arc_alloc_raw_buf(os->os_spa, db,
+                           dmu_objset_id(os), byteorder, salt, iv, mac,
+                           dn->dn_type, psize, lsize, compress_type);
+               } else if (compress_type != ZIO_COMPRESS_OFF) {
                        ASSERT3U(type, ==, ARC_BUFC_DATA);
                        *datap = arc_alloc_compressed_buf(os->os_spa, db,
                            psize, lsize, compress_type);
+               } else {
+                       *datap = arc_alloc_buf(os->os_spa, db, type, psize);
                }
                bcopy(db->db.db_data, (*datap)->b_data, psize);
        }
@@ -3463,7 +3569,8 @@ dbuf_write_ready(zio_t *zio, arc_buf_t *buf, void *vdb)
                if (dn->dn_type == DMU_OT_DNODE) {
                        i = 0;
                        while (i < db->db.db_size) {
-                               dnode_phys_t *dnp = db->db.db_data + i;
+                               dnode_phys_t *dnp =
+                                   (void *)(((char *)db->db.db_data) + i);
 
                                i += DNODE_MIN_SIZE;
                                if (dnp->dn_type != DMU_OT_NONE) {
@@ -3491,7 +3598,7 @@ dbuf_write_ready(zio_t *zio, arc_buf_t *buf, void *vdb)
        DB_DNODE_EXIT(db);
 
        if (!BP_IS_EMBEDDED(bp))
-               bp->blk_fill = fill;
+               BP_SET_FILL(bp, fill);
 
        mutex_exit(&db->db_mtx);
 
@@ -3514,13 +3621,13 @@ dbuf_write_children_ready(zio_t *zio, arc_buf_t *buf, void *vdb)
        dmu_buf_impl_t *db = vdb;
        dnode_t *dn;
        blkptr_t *bp;
-       uint64_t i;
-       int epbs;
+       unsigned int epbs, i;
 
        ASSERT3U(db->db_level, >, 0);
        DB_DNODE_ENTER(db);
        dn = DB_DNODE(db);
        epbs = dn->dn_phys->dn_indblkshift - SPA_BLKPTRSHIFT;
+       ASSERT3U(epbs, <, 31);
 
        /* Determine if all our children are holes */
        for (i = 0, bp = db->db.db_data; i < 1ULL << epbs; i++, bp++) {
@@ -3533,8 +3640,14 @@ dbuf_write_children_ready(zio_t *zio, arc_buf_t *buf, void *vdb)
         * we may get compressed away.
         */
        if (i == 1ULL << epbs) {
-               /* didn't find any non-holes */
+               /*
+                * We only found holes. Grab the rwlock to prevent
+                * anybody from reading the blocks we're about to
+                * zero out.
+                */
+               rw_enter(&dn->dn_struct_rwlock, RW_WRITER);
                bzero(db->db.db_data, db->db.db_size);
+               rw_exit(&dn->dn_struct_rwlock);
        }
        DB_DNODE_EXIT(db);
 }
@@ -3770,9 +3883,7 @@ dbuf_write(dbuf_dirty_record_t *dr, arc_buf_t *data, dmu_tx_t *tx)
                wp_flag = WP_SPILL;
        wp_flag |= (db->db_state == DB_NOFILL) ? WP_NOFILL : 0;
 
-       dmu_write_policy(os, dn, db->db_level, wp_flag,
-           (data != NULL && arc_get_compression(data) != ZIO_COMPRESS_OFF) ?
-           arc_get_compression(data) : ZIO_COMPRESS_INHERIT, &zp);
+       dmu_write_policy(os, dn, db->db_level, wp_flag, &zp);
        DB_DNODE_EXIT(db);
 
        /*
@@ -3812,7 +3923,6 @@ dbuf_write(dbuf_dirty_record_t *dr, arc_buf_t *data, dmu_tx_t *tx)
                    ZIO_PRIORITY_ASYNC_WRITE,
                    ZIO_FLAG_MUSTSUCCEED | ZIO_FLAG_NODATA, &zb);
        } else {
-               arc_done_func_t *children_ready_cb = NULL;
                ASSERT(arc_released(data));
 
                /*
@@ -3820,6 +3930,7 @@ dbuf_write(dbuf_dirty_record_t *dr, arc_buf_t *data, dmu_tx_t *tx)
                 * ready callback so that we can properly handle an indirect
                 * block that only contains holes.
                 */
+               arc_write_done_func_t *children_ready_cb = NULL;
                if (db->db_level != 0)
                        children_ready_cb = dbuf_write_children_ready;
 
@@ -3844,6 +3955,7 @@ EXPORT_SYMBOL(dbuf_free_range);
 EXPORT_SYMBOL(dbuf_new_size);
 EXPORT_SYMBOL(dbuf_release_bp);
 EXPORT_SYMBOL(dbuf_dirty);
+EXPORT_SYMBOL(dmu_buf_will_change_crypt_params);
 EXPORT_SYMBOL(dmu_buf_will_dirty);
 EXPORT_SYMBOL(dmu_buf_will_not_fill);
 EXPORT_SYMBOL(dmu_buf_will_fill);
@@ -3865,7 +3977,6 @@ EXPORT_SYMBOL(dbuf_sync_list);
 EXPORT_SYMBOL(dmu_buf_set_user);
 EXPORT_SYMBOL(dmu_buf_set_user_ie);
 EXPORT_SYMBOL(dmu_buf_get_user);
-EXPORT_SYMBOL(dmu_buf_freeable);
 EXPORT_SYMBOL(dmu_buf_get_blkptr);
 
 /* BEGIN CSTYLED */