From 440a3eb939441a42ab5029e5e64498d802fa276b Mon Sep 17 00:00:00 2001 From: Tom Caputi Date: Thu, 28 Sep 2017 11:49:13 -0400 Subject: [PATCH] Fixes for #6639 Several issues were uncovered by running stress tests with zfs encryption and raw sends in particular. The issues and their associated fixes are as follows: * arc_read_done() has the ability to chain several requests for the same block of data via the arc_callback_t struct. In these cases, the ARC would only use the first request's dsobj from the bookmark to decrypt the data. This is problematic because the first request might be a prefetch zio which is able to handle the key not being loaded, while the second might use a different key that it is sure will work. The fix here is to pass the dsobj with each individual arc_callback_t so that each request can attempt to decrypt the data separately. * DRR_FREE and DRR_FREEOBJECT records in a send file were not having their transactions properly tagged as raw during raw sends, which caused a panic when the dbuf code attempted to decrypt these blocks. * traverse_prefetch_metadata() did not properly set ZIO_FLAG_SPECULATIVE when issuing prefetch IOs. * Added a few asserts and code cleanups to ensure these issues are more detectable in the future. Signed-off-by: Tom Caputi --- cmd/ztest/ztest.c | 16 +++--- include/sys/arc_impl.h | 1 + include/sys/dmu.h | 16 +++--- module/zfs/arc.c | 54 ++++++++++++-------- module/zfs/dbuf.c | 9 ++++ module/zfs/dmu.c | 103 ++++++++++++++++++++++++++++---------- module/zfs/dmu_send.c | 32 +++++++----- module/zfs/dmu_traverse.c | 2 +- module/zfs/zfs_vnops.c | 4 +- 9 files changed, 162 insertions(+), 75 deletions(-) diff --git a/cmd/ztest/ztest.c b/cmd/ztest/ztest.c index d397d7279..248d04fc4 100644 --- a/cmd/ztest/ztest.c +++ b/cmd/ztest/ztest.c @@ -1958,7 +1958,7 @@ ztest_replay_write(ztest_ds_t *zd, lr_write_t *lr, boolean_t byteswap) dmu_write(os, lr->lr_foid, offset, length, data, tx); } else { bcopy(data, abuf->b_data, length); - dmu_assign_arcbuf(db, offset, abuf, tx); + dmu_assign_arcbuf_by_dbuf(db, offset, abuf, tx); } (void) ztest_log_write(zd, tx, lr); @@ -4346,7 +4346,7 @@ ztest_dmu_read_write_zcopy(ztest_ds_t *zd, uint64_t id) * bigobj, at the tail of the nth chunk * * The chunk size is set equal to bigobj block size so that - * dmu_assign_arcbuf() can be tested for object updates. + * dmu_assign_arcbuf_by_dbuf() can be tested for object updates. */ /* @@ -4408,7 +4408,7 @@ ztest_dmu_read_write_zcopy(ztest_ds_t *zd, uint64_t id) /* * In iteration 5 (i == 5) use arcbufs * that don't match bigobj blksz to test - * dmu_assign_arcbuf() when it can't directly + * dmu_assign_arcbuf_by_dbuf() when it can't directly * assign an arcbuf to a dbuf. */ for (j = 0; j < s; j++) { @@ -4454,8 +4454,8 @@ ztest_dmu_read_write_zcopy(ztest_ds_t *zd, uint64_t id) /* * 50% of the time don't read objects in the 1st iteration to - * test dmu_assign_arcbuf() for the case when there're no - * existing dbufs for the specified offsets. + * test dmu_assign_arcbuf_by_dbuf() for the case when there are + * no existing dbufs for the specified offsets. */ if (i != 0 || ztest_random(2) != 0) { error = dmu_read(os, packobj, packoff, @@ -4500,12 +4500,12 @@ ztest_dmu_read_write_zcopy(ztest_ds_t *zd, uint64_t id) FTAG, &dbt, DMU_READ_NO_PREFETCH) == 0); } if (i != 5 || chunksize < (SPA_MINBLOCKSIZE * 2)) { - dmu_assign_arcbuf(bonus_db, off, + dmu_assign_arcbuf_by_dbuf(bonus_db, off, bigbuf_arcbufs[j], tx); } else { - dmu_assign_arcbuf(bonus_db, off, + dmu_assign_arcbuf_by_dbuf(bonus_db, off, bigbuf_arcbufs[2 * j], tx); - dmu_assign_arcbuf(bonus_db, + dmu_assign_arcbuf_by_dbuf(bonus_db, off + chunksize / 2, bigbuf_arcbufs[2 * j + 1], tx); } diff --git a/include/sys/arc_impl.h b/include/sys/arc_impl.h index 361468583..e39cf6a8f 100644 --- a/include/sys/arc_impl.h +++ b/include/sys/arc_impl.h @@ -96,6 +96,7 @@ struct arc_callback { boolean_t acb_encrypted; boolean_t acb_compressed; boolean_t acb_noauth; + uint64_t acb_dsobj; zio_t *acb_zio_dummy; arc_callback_t *acb_next; }; diff --git a/include/sys/dmu.h b/include/sys/dmu.h index 60778289e..8a9291900 100644 --- a/include/sys/dmu.h +++ b/include/sys/dmu.h @@ -759,10 +759,13 @@ void dmu_tx_callback_register(dmu_tx_t *tx, dmu_tx_callback_func_t *dcb_func, * -1, the range from offset to end-of-file is freed. */ int dmu_free_range(objset_t *os, uint64_t object, uint64_t offset, - uint64_t size, dmu_tx_t *tx); + uint64_t size, dmu_tx_t *tx); int dmu_free_long_range(objset_t *os, uint64_t object, uint64_t offset, - uint64_t size); + uint64_t size); +int dmu_free_long_range_raw(objset_t *os, uint64_t object, uint64_t offset, + uint64_t size); int dmu_free_long_object(objset_t *os, uint64_t object); +int dmu_free_long_object_raw(objset_t *os, uint64_t object); /* * Convenience functions. @@ -797,10 +800,11 @@ int dmu_write_uio_dnode(dnode_t *dn, struct uio *uio, uint64_t size, #endif struct arc_buf *dmu_request_arcbuf(dmu_buf_t *handle, int size); void dmu_return_arcbuf(struct arc_buf *buf); -void dmu_assign_arcbuf(dmu_buf_t *handle, uint64_t offset, struct arc_buf *buf, - dmu_tx_t *tx); -void dmu_assign_arcbuf_impl(dmu_buf_t *handle, struct arc_buf *buf, - dmu_tx_t *tx); +void dmu_assign_arcbuf_by_dnode(dnode_t *dn, uint64_t offset, + struct arc_buf *buf, dmu_tx_t *tx); +void dmu_assign_arcbuf_by_dbuf(dmu_buf_t *handle, uint64_t offset, + struct arc_buf *buf, dmu_tx_t *tx); +#define dmu_assign_arcbuf dmu_assign_arcbuf_by_dbuf void dmu_convert_to_raw(dmu_buf_t *handle, boolean_t byteorder, const uint8_t *salt, const uint8_t *iv, const uint8_t *mac, dmu_tx_t *tx); void dmu_copy_from_buf(objset_t *os, uint64_t object, uint64_t offset, diff --git a/module/zfs/arc.c b/module/zfs/arc.c index 1329e8e83..6256fed2b 100644 --- a/module/zfs/arc.c +++ b/module/zfs/arc.c @@ -3155,13 +3155,14 @@ arc_buf_destroy_impl(arc_buf_t *buf) hdr->b_crypt_hdr.b_ebufcnt -= 1; /* - * if we have no more encrypted buffers and we've already + * If we have no more encrypted buffers and we've already * gotten a copy of the decrypted data we can free b_rabd to * save some space. */ if (hdr->b_crypt_hdr.b_ebufcnt == 0 && HDR_HAS_RABD(hdr) && - hdr->b_l1hdr.b_pabd != NULL) + hdr->b_l1hdr.b_pabd != NULL && !HDR_IO_IN_PROGRESS(hdr)) { arc_hdr_free_abd(hdr, B_TRUE); + } } arc_buf_t *lastbuf = arc_buf_remove(hdr, buf); @@ -3716,9 +3717,8 @@ arc_hdr_destroy(arc_buf_hdr_t *hdr) arc_hdr_free_abd(hdr, B_FALSE); } - if (HDR_HAS_RABD(hdr)) { + if (HDR_HAS_RABD(hdr)) arc_hdr_free_abd(hdr, B_TRUE); - } } ASSERT3P(hdr->b_hash_next, ==, NULL); @@ -5746,16 +5746,15 @@ arc_read_done(zio_t *zio) callback_cnt++; int error = arc_buf_alloc_impl(hdr, zio->io_spa, - zio->io_bookmark.zb_objset, acb->acb_private, - acb->acb_encrypted, acb->acb_compressed, acb->acb_noauth, - no_zio_error, &acb->acb_buf); + acb->acb_dsobj, acb->acb_private, acb->acb_encrypted, + acb->acb_compressed, acb->acb_noauth, no_zio_error, + &acb->acb_buf); /* - * assert non-speculative zios didn't fail because an + * Assert non-speculative zios didn't fail because an * encryption key wasn't loaded */ - ASSERT((zio->io_flags & ZIO_FLAG_SPECULATIVE) || - error == 0 || error != ENOENT); + ASSERT((zio->io_flags & ZIO_FLAG_SPECULATIVE) || error == 0); /* * If we failed to decrypt, report an error now (as the zio @@ -5778,10 +5777,8 @@ arc_read_done(zio_t *zio) } hdr->b_l1hdr.b_acb = NULL; arc_hdr_clear_flags(hdr, ARC_FLAG_IO_IN_PROGRESS); - if (callback_cnt == 0) { - ASSERT(HDR_PREFETCH(hdr) || HDR_HAS_RABD(hdr)); + if (callback_cnt == 0) ASSERT(hdr->b_l1hdr.b_pabd != NULL || HDR_HAS_RABD(hdr)); - } ASSERT(refcount_is_zero(&hdr->b_l1hdr.b_refcnt) || callback_list != NULL); @@ -5943,6 +5940,9 @@ top: acb->acb_done = done; acb->acb_private = private; acb->acb_compressed = compressed_read; + acb->acb_encrypted = encrypted_read; + acb->acb_noauth = noauth_read; + acb->acb_dsobj = zb->zb_objset; if (pio != NULL) acb->acb_zio_dummy = zio_null(pio, spa, NULL, NULL, NULL, zio_flags); @@ -5981,9 +5981,7 @@ top: rc = arc_buf_alloc_impl(hdr, spa, zb->zb_objset, private, encrypted_read, compressed_read, noauth_read, B_TRUE, &buf); - - ASSERT((zio_flags & ZIO_FLAG_SPECULATIVE) || - rc == 0 || rc != ENOENT); + ASSERT((zio_flags & ZIO_FLAG_SPECULATIVE) || rc == 0); } else if (*arc_flags & ARC_FLAG_PREFETCH && refcount_count(&hdr->b_l1hdr.b_refcnt) == 0) { arc_hdr_set_flags(hdr, ARC_FLAG_PREFETCH); @@ -6008,7 +6006,7 @@ top: uint64_t addr = 0; boolean_t devw = B_FALSE; uint64_t size; - void *hdr_abd; + abd_t *hdr_abd; /* * Gracefully handle a damaged logical block size as a @@ -6131,6 +6129,7 @@ top: acb->acb_compressed = compressed_read; acb->acb_encrypted = encrypted_read; acb->acb_noauth = noauth_read; + acb->acb_dsobj = zb->zb_objset; ASSERT3P(hdr->b_l1hdr.b_acb, ==, NULL); hdr->b_l1hdr.b_acb = acb; @@ -7961,9 +7960,15 @@ l2arc_untransform(zio_t *zio, l2arc_read_callback_t *cb) */ ASSERT3U(BP_GET_TYPE(bp), !=, DMU_OT_INTENT_LOG); ASSERT(MUTEX_HELD(HDR_LOCK(hdr))); + ASSERT3P(hdr->b_l1hdr.b_pabd, !=, NULL); - /* If the data was encrypted, decrypt it now */ - if (HDR_ENCRYPTED(hdr)) { + /* + * If the data was encrypted, decrypt it now. Note that + * we must check the bp here and not the hdr, since the + * hdr does not have its encryption parameters updated + * until arc_read_done(). + */ + if (BP_IS_ENCRYPTED(bp)) { abd_t *eabd = arc_get_data_abd(hdr, arc_hdr_size(hdr), hdr); @@ -8089,7 +8094,16 @@ l2arc_read_done(zio_t *zio) */ abd_free(cb->l2rcb_abd); zio->io_size = zio->io_orig_size = arc_hdr_size(hdr); - zio->io_abd = zio->io_orig_abd = hdr->b_l1hdr.b_pabd; + + if (BP_IS_ENCRYPTED(&cb->l2rcb_bp) && + (cb->l2rcb_flags & ZIO_FLAG_RAW_ENCRYPT)) { + ASSERT(HDR_HAS_RABD(hdr)); + zio->io_abd = zio->io_orig_abd = + hdr->b_crypt_hdr.b_rabd; + } else { + ASSERT3P(hdr->b_l1hdr.b_pabd, !=, NULL); + zio->io_abd = zio->io_orig_abd = hdr->b_l1hdr.b_pabd; + } } ASSERT3P(zio->io_abd, !=, NULL); diff --git a/module/zfs/dbuf.c b/module/zfs/dbuf.c index 537f22011..c954dec1b 100644 --- a/module/zfs/dbuf.c +++ b/module/zfs/dbuf.c @@ -2153,6 +2153,13 @@ dbuf_assign_arcbuf(dmu_buf_impl_t *db, arc_buf_t *buf, dmu_tx_t *tx) if (db->db_state == DB_CACHED && refcount_count(&db->db_holds) - 1 > db->db_dirtycnt) { + /* + * In practice, we will never have a case where we have an + * encrypted arc buffer while additional holds exist on the + * dbuf. We don't handle this here so we simply assert that + * fact instead. + */ + ASSERT(!arc_is_encrypted(buf)); mutex_exit(&db->db_mtx); (void) dbuf_dirty(db, tx); bcopy(buf->b_data, db->db.db_data, db->db.db_size); @@ -2168,6 +2175,8 @@ dbuf_assign_arcbuf(dmu_buf_impl_t *db, arc_buf_t *buf, dmu_tx_t *tx) ASSERT(db->db_buf != NULL); if (dr != NULL && dr->dr_txg == tx->tx_txg) { ASSERT(dr->dt.dl.dr_data == db->db_buf); + IMPLY(arc_is_encrypted(buf), dr->dt.dl.dr_raw); + if (!arc_released(db->db_buf)) { ASSERT(dr->dt.dl.dr_override_state == DR_OVERRIDDEN); diff --git a/module/zfs/dmu.c b/module/zfs/dmu.c index 1eb35b935..1aba0b133 100644 --- a/module/zfs/dmu.c +++ b/module/zfs/dmu.c @@ -761,7 +761,7 @@ dmu_objset_zfs_unmounting(objset_t *os) static int dmu_free_long_range_impl(objset_t *os, dnode_t *dn, uint64_t offset, - uint64_t length) + uint64_t length, boolean_t raw) { uint64_t object_size; int err; @@ -844,6 +844,17 @@ dmu_free_long_range_impl(objset_t *os, dnode_t *dn, uint64_t offset, uint64_t, long_free_dirty_all_txgs, uint64_t, chunk_len, uint64_t, dmu_tx_get_txg(tx)); dnode_free_range(dn, chunk_begin, chunk_len, tx); + + /* if this is a raw free, mark the dirty record as such */ + if (raw) { + dbuf_dirty_record_t *dr = dn->dn_dbuf->db_last_dirty; + + while (dr != NULL && dr->dr_txg > tx->tx_txg) + dr = dr->dr_next; + if (dr != NULL && dr->dr_txg == tx->tx_txg) + dr->dt.dl.dr_raw = B_TRUE; + } + dmu_tx_commit(tx); length -= chunk_len; @@ -861,7 +872,7 @@ dmu_free_long_range(objset_t *os, uint64_t object, err = dnode_hold(os, object, FTAG, &dn); if (err != 0) return (err); - err = dmu_free_long_range_impl(os, dn, offset, length); + err = dmu_free_long_range_impl(os, dn, offset, length, B_FALSE); /* * It is important to zero out the maxblkid when freeing the entire @@ -876,8 +887,37 @@ dmu_free_long_range(objset_t *os, uint64_t object, return (err); } +/* + * This function is equivalent to dmu_free_long_range(), but also + * marks the new dirty record as a raw write. + */ int -dmu_free_long_object(objset_t *os, uint64_t object) +dmu_free_long_range_raw(objset_t *os, uint64_t object, + uint64_t offset, uint64_t length) +{ + dnode_t *dn; + int err; + + err = dnode_hold(os, object, FTAG, &dn); + if (err != 0) + return (err); + err = dmu_free_long_range_impl(os, dn, offset, length, B_TRUE); + + /* + * It is important to zero out the maxblkid when freeing the entire + * file, so that (a) subsequent calls to dmu_free_long_range_impl() + * will take the fast path, and (b) dnode_reallocate() can verify + * that the entire file has been freed. + */ + if (err == 0 && offset == 0 && length == DMU_OBJECT_END) + dn->dn_maxblkid = 0; + + dnode_rele(dn, FTAG); + return (err); +} + +static int +dmu_free_long_object_impl(objset_t *os, uint64_t object, boolean_t raw) { dmu_tx_t *tx; int err; @@ -893,6 +933,9 @@ dmu_free_long_object(objset_t *os, uint64_t object) err = dmu_tx_assign(tx, TXG_WAIT); if (err == 0) { err = dmu_object_free(os, object, tx); + if (err == 0 && raw) + VERIFY0(dmu_object_dirty_raw(os, object, tx)); + dmu_tx_commit(tx); } else { dmu_tx_abort(tx); @@ -901,6 +944,19 @@ dmu_free_long_object(objset_t *os, uint64_t object) return (err); } +int +dmu_free_long_object(objset_t *os, uint64_t object) +{ + return (dmu_free_long_object_impl(os, object, B_FALSE)); +} + +int +dmu_free_long_object_raw(objset_t *os, uint64_t object) +{ + return (dmu_free_long_object_impl(os, object, B_TRUE)); +} + + int dmu_free_range(objset_t *os, uint64_t object, uint64_t offset, uint64_t size, dmu_tx_t *tx) @@ -1486,13 +1542,6 @@ dmu_return_arcbuf(arc_buf_t *buf) arc_buf_destroy(buf, FTAG); } -void -dmu_assign_arcbuf_impl(dmu_buf_t *handle, arc_buf_t *buf, dmu_tx_t *tx) -{ - dmu_buf_impl_t *db = (dmu_buf_impl_t *)handle; - dbuf_assign_arcbuf(db, buf, tx); -} - void dmu_convert_to_raw(dmu_buf_t *handle, boolean_t byteorder, const uint8_t *salt, const uint8_t *iv, const uint8_t *mac, dmu_tx_t *tx) @@ -1569,22 +1618,19 @@ dmu_copy_from_buf(objset_t *os, uint64_t object, uint64_t offset, * dmu_write(). */ void -dmu_assign_arcbuf(dmu_buf_t *handle, uint64_t offset, arc_buf_t *buf, +dmu_assign_arcbuf_by_dnode(dnode_t *dn, uint64_t offset, arc_buf_t *buf, dmu_tx_t *tx) { - dmu_buf_impl_t *dbuf = (dmu_buf_impl_t *)handle; - dnode_t *dn; dmu_buf_impl_t *db; + objset_t *os = dn->dn_objset; + uint64_t object = dn->dn_object; uint32_t blksz = (uint32_t)arc_buf_lsize(buf); uint64_t blkid; - DB_DNODE_ENTER(dbuf); - dn = DB_DNODE(dbuf); rw_enter(&dn->dn_struct_rwlock, RW_READER); blkid = dbuf_whichblock(dn, 0, offset); VERIFY((db = dbuf_hold(dn, blkid, FTAG)) != NULL); rw_exit(&dn->dn_struct_rwlock); - DB_DNODE_EXIT(dbuf); /* * We can only assign if the offset is aligned, the arc buf is the @@ -1594,19 +1640,10 @@ dmu_assign_arcbuf(dmu_buf_t *handle, uint64_t offset, arc_buf_t *buf, dbuf_assign_arcbuf(db, buf, tx); dbuf_rele(db, FTAG); } else { - objset_t *os; - uint64_t object; - /* compressed bufs must always be assignable to their dbuf */ ASSERT3U(arc_get_compression(buf), ==, ZIO_COMPRESS_OFF); ASSERT(!(buf->b_flags & ARC_BUF_FLAG_COMPRESSED)); - DB_DNODE_ENTER(dbuf); - dn = DB_DNODE(dbuf); - os = dn->dn_objset; - object = dn->dn_object; - DB_DNODE_EXIT(dbuf); - dbuf_rele(db, FTAG); dmu_write(os, object, offset, blksz, buf->b_data, tx); dmu_return_arcbuf(buf); @@ -1614,6 +1651,17 @@ dmu_assign_arcbuf(dmu_buf_t *handle, uint64_t offset, arc_buf_t *buf, } } +void +dmu_assign_arcbuf_by_dbuf(dmu_buf_t *handle, uint64_t offset, arc_buf_t *buf, + dmu_tx_t *tx) +{ + dmu_buf_impl_t *dbuf = (dmu_buf_impl_t *)handle; + + DB_DNODE_ENTER(dbuf); + dmu_assign_arcbuf_by_dnode(DB_DNODE(dbuf), offset, buf, tx); + DB_DNODE_EXIT(dbuf); +} + typedef struct { dbuf_dirty_record_t *dsa_dr; dmu_sync_cb_t *dsa_done; @@ -2424,7 +2472,9 @@ EXPORT_SYMBOL(dmu_buf_rele_array); EXPORT_SYMBOL(dmu_prefetch); EXPORT_SYMBOL(dmu_free_range); EXPORT_SYMBOL(dmu_free_long_range); +EXPORT_SYMBOL(dmu_free_long_range_raw); EXPORT_SYMBOL(dmu_free_long_object); +EXPORT_SYMBOL(dmu_free_long_object_raw); EXPORT_SYMBOL(dmu_read); EXPORT_SYMBOL(dmu_read_by_dnode); EXPORT_SYMBOL(dmu_write); @@ -2443,7 +2493,8 @@ EXPORT_SYMBOL(dmu_write_policy); EXPORT_SYMBOL(dmu_sync); EXPORT_SYMBOL(dmu_request_arcbuf); EXPORT_SYMBOL(dmu_return_arcbuf); -EXPORT_SYMBOL(dmu_assign_arcbuf); +EXPORT_SYMBOL(dmu_assign_arcbuf_by_dnode); +EXPORT_SYMBOL(dmu_assign_arcbuf_by_dbuf); EXPORT_SYMBOL(dmu_buf_hold); EXPORT_SYMBOL(dmu_ot); diff --git a/module/zfs/dmu_send.c b/module/zfs/dmu_send.c index 235e832d7..4318a7815 100644 --- a/module/zfs/dmu_send.c +++ b/module/zfs/dmu_send.c @@ -2592,7 +2592,11 @@ receive_freeobjects(struct receive_writer_arg *rwa, else if (err != 0) return (err); - err = dmu_free_long_object(rwa->os, obj); + if (rwa->raw) + err = dmu_free_long_object_raw(rwa->os, obj); + else + err = dmu_free_long_object(rwa->os, obj); + if (err != 0) return (err); @@ -2608,9 +2612,9 @@ noinline static int receive_write(struct receive_writer_arg *rwa, struct drr_write *drrw, arc_buf_t *abuf) { - dmu_tx_t *tx; - dmu_buf_t *bonus; int err; + dmu_tx_t *tx; + dnode_t *dn; if (drrw->drr_offset + drrw->drr_logical_size < drrw->drr_offset || !DMU_OT_IS_VALID(drrw->drr_type)) @@ -2635,7 +2639,6 @@ receive_write(struct receive_writer_arg *rwa, struct drr_write *drrw, return (SET_ERROR(EINVAL)); tx = dmu_tx_create(rwa->os); - dmu_tx_hold_write(tx, drrw->drr_object, drrw->drr_offset, drrw->drr_logical_size); err = dmu_tx_assign(tx, TXG_WAIT); @@ -2655,10 +2658,9 @@ receive_write(struct receive_writer_arg *rwa, struct drr_write *drrw, DRR_WRITE_PAYLOAD_SIZE(drrw)); } - /* use the bonus buf to look up the dnode in dmu_assign_arcbuf */ - if (dmu_bonus_hold(rwa->os, drrw->drr_object, FTAG, &bonus) != 0) - return (SET_ERROR(EINVAL)); - dmu_assign_arcbuf(bonus, drrw->drr_offset, abuf, tx); + VERIFY0(dnode_hold(rwa->os, drrw->drr_object, FTAG, &dn)); + dmu_assign_arcbuf_by_dnode(dn, drrw->drr_offset, abuf, tx); + dnode_rele(dn, FTAG); /* * Note: If the receive fails, we want the resume stream to start @@ -2668,7 +2670,6 @@ receive_write(struct receive_writer_arg *rwa, struct drr_write *drrw, */ save_resume_state(rwa, drrw->drr_object, drrw->drr_offset, tx); dmu_tx_commit(tx); - dmu_buf_rele(bonus, FTAG); return (0); } @@ -2767,6 +2768,8 @@ receive_write_embedded(struct receive_writer_arg *rwa, return (SET_ERROR(EINVAL)); if (drrwe->drr_compression >= ZIO_COMPRESS_FUNCTIONS) return (SET_ERROR(EINVAL)); + if (rwa->raw) + return (SET_ERROR(EINVAL)); if (drrwe->drr_object > rwa->max_object) rwa->max_object = drrwe->drr_object; @@ -2841,7 +2844,7 @@ receive_spill(struct receive_writer_arg *rwa, struct drr_spill *drrs, if (db_spill->db_size < drrs->drr_length) VERIFY(0 == dbuf_spill_set_blksz(db_spill, drrs->drr_length, tx)); - dmu_assign_arcbuf_impl(db_spill, abuf, tx); + dbuf_assign_arcbuf((dmu_buf_impl_t *)db_spill, abuf, tx); dmu_buf_rele(db, FTAG); dmu_buf_rele(db_spill, FTAG); @@ -2866,8 +2869,13 @@ receive_free(struct receive_writer_arg *rwa, struct drr_free *drrf) if (drrf->drr_object > rwa->max_object) rwa->max_object = drrf->drr_object; - err = dmu_free_long_range(rwa->os, drrf->drr_object, - drrf->drr_offset, drrf->drr_length); + if (rwa->raw) { + err = dmu_free_long_range_raw(rwa->os, drrf->drr_object, + drrf->drr_offset, drrf->drr_length); + } else { + err = dmu_free_long_range(rwa->os, drrf->drr_object, + drrf->drr_offset, drrf->drr_length); + } return (err); } diff --git a/module/zfs/dmu_traverse.c b/module/zfs/dmu_traverse.c index a6c27b4be..268d82ce4 100644 --- a/module/zfs/dmu_traverse.c +++ b/module/zfs/dmu_traverse.c @@ -181,7 +181,7 @@ traverse_prefetch_metadata(traverse_data_t *td, const blkptr_t *bp, const zbookmark_phys_t *zb) { arc_flags_t flags = ARC_FLAG_NOWAIT | ARC_FLAG_PREFETCH; - int zio_flags = ZIO_FLAG_CANFAIL; + int zio_flags = ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE; if (!(td->td_flags & TRAVERSE_PREFETCH_METADATA)) return; diff --git a/module/zfs/zfs_vnops.c b/module/zfs/zfs_vnops.c index 62241a46b..85dd4a049 100644 --- a/module/zfs/zfs_vnops.c +++ b/module/zfs/zfs_vnops.c @@ -841,8 +841,8 @@ zfs_write(struct inode *ip, uio_t *uio, int ioflag, cred_t *cr) xuio_stat_wbuf_copied(); } else { ASSERT(xuio || tx_bytes == max_blksz); - dmu_assign_arcbuf(sa_get_db(zp->z_sa_hdl), - woff, abuf, tx); + dmu_assign_arcbuf_by_dbuf( + sa_get_db(zp->z_sa_hdl), woff, abuf, tx); } ASSERT(tx_bytes <= uio->uio_resid); uioskip(uio, tx_bytes); -- 2.39.2