]> git.proxmox.com Git - mirror_zfs.git/commitdiff
Fixes for #6639
authorTom Caputi <tcaputi@datto.com>
Thu, 28 Sep 2017 15:49:13 +0000 (11:49 -0400)
committerTom Caputi <tcaputi@datto.com>
Wed, 11 Oct 2017 20:55:50 +0000 (16:55 -0400)
Several issues were uncovered by running stress tests with zfs
encryption and raw sends in particular. The issues and their
associated fixes are as follows:

* arc_read_done() has the ability to chain several requests for
  the same block of data via the arc_callback_t struct. In these
  cases, the ARC would only use the first request's dsobj from
  the bookmark to decrypt the data. This is problematic because
  the first request might be a prefetch zio which is able to
  handle the key not being loaded, while the second might use a
  different key that it is sure will work. The fix here is to
  pass the dsobj with each individual arc_callback_t so that each
  request can attempt to decrypt the data separately.

* DRR_FREE and DRR_FREEOBJECT records in a send file were not
  having their transactions properly tagged as raw during raw
  sends, which caused a panic when the dbuf code attempted to
  decrypt these blocks.

* traverse_prefetch_metadata() did not properly set
  ZIO_FLAG_SPECULATIVE when issuing prefetch IOs.

* Added a few asserts and code cleanups to ensure these issues
  are more detectable in the future.

Signed-off-by: Tom Caputi <tcaputi@datto.com>
cmd/ztest/ztest.c
include/sys/arc_impl.h
include/sys/dmu.h
module/zfs/arc.c
module/zfs/dbuf.c
module/zfs/dmu.c
module/zfs/dmu_send.c
module/zfs/dmu_traverse.c
module/zfs/zfs_vnops.c

index d397d7279ed1f0ca19b12af2c33f7895c6944cff..248d04fc4512253dd004cc41a4dded48a3b98bec 100644 (file)
@@ -1958,7 +1958,7 @@ ztest_replay_write(ztest_ds_t *zd, lr_write_t *lr, boolean_t byteswap)
                dmu_write(os, lr->lr_foid, offset, length, data, tx);
        } else {
                bcopy(data, abuf->b_data, length);
-               dmu_assign_arcbuf(db, offset, abuf, tx);
+               dmu_assign_arcbuf_by_dbuf(db, offset, abuf, tx);
        }
 
        (void) ztest_log_write(zd, tx, lr);
@@ -4346,7 +4346,7 @@ ztest_dmu_read_write_zcopy(ztest_ds_t *zd, uint64_t id)
         *      bigobj, at the tail of the nth chunk
         *
         * The chunk size is set equal to bigobj block size so that
-        * dmu_assign_arcbuf() can be tested for object updates.
+        * dmu_assign_arcbuf_by_dbuf() can be tested for object updates.
         */
 
        /*
@@ -4408,7 +4408,7 @@ ztest_dmu_read_write_zcopy(ztest_ds_t *zd, uint64_t id)
                /*
                 * In iteration 5 (i == 5) use arcbufs
                 * that don't match bigobj blksz to test
-                * dmu_assign_arcbuf() when it can't directly
+                * dmu_assign_arcbuf_by_dbuf() when it can't directly
                 * assign an arcbuf to a dbuf.
                 */
                for (j = 0; j < s; j++) {
@@ -4454,8 +4454,8 @@ ztest_dmu_read_write_zcopy(ztest_ds_t *zd, uint64_t id)
 
                /*
                 * 50% of the time don't read objects in the 1st iteration to
-                * test dmu_assign_arcbuf() for the case when there're no
-                * existing dbufs for the specified offsets.
+                * test dmu_assign_arcbuf_by_dbuf() for the case when there are
+                * no existing dbufs for the specified offsets.
                 */
                if (i != 0 || ztest_random(2) != 0) {
                        error = dmu_read(os, packobj, packoff,
@@ -4500,12 +4500,12 @@ ztest_dmu_read_write_zcopy(ztest_ds_t *zd, uint64_t id)
                                    FTAG, &dbt, DMU_READ_NO_PREFETCH) == 0);
                        }
                        if (i != 5 || chunksize < (SPA_MINBLOCKSIZE * 2)) {
-                               dmu_assign_arcbuf(bonus_db, off,
+                               dmu_assign_arcbuf_by_dbuf(bonus_db, off,
                                    bigbuf_arcbufs[j], tx);
                        } else {
-                               dmu_assign_arcbuf(bonus_db, off,
+                               dmu_assign_arcbuf_by_dbuf(bonus_db, off,
                                    bigbuf_arcbufs[2 * j], tx);
-                               dmu_assign_arcbuf(bonus_db,
+                               dmu_assign_arcbuf_by_dbuf(bonus_db,
                                    off + chunksize / 2,
                                    bigbuf_arcbufs[2 * j + 1], tx);
                        }
index 36146858305654f1a8122cac0c68a6f079faf956..e39cf6a8ff490b065c57b2230372ae79cb1bef77 100644 (file)
@@ -96,6 +96,7 @@ struct arc_callback {
        boolean_t               acb_encrypted;
        boolean_t               acb_compressed;
        boolean_t               acb_noauth;
+       uint64_t                acb_dsobj;
        zio_t                   *acb_zio_dummy;
        arc_callback_t          *acb_next;
 };
index 60778289e9b49ca92578425abe7e4b14d052ba5e..8a92919003ed16595decf0e11990e8583f3caada 100644 (file)
@@ -759,10 +759,13 @@ void dmu_tx_callback_register(dmu_tx_t *tx, dmu_tx_callback_func_t *dcb_func,
  * -1, the range from offset to end-of-file is freed.
  */
 int dmu_free_range(objset_t *os, uint64_t object, uint64_t offset,
-       uint64_t size, dmu_tx_t *tx);
+    uint64_t size, dmu_tx_t *tx);
 int dmu_free_long_range(objset_t *os, uint64_t object, uint64_t offset,
-       uint64_t size);
+    uint64_t size);
+int dmu_free_long_range_raw(objset_t *os, uint64_t object, uint64_t offset,
+    uint64_t size);
 int dmu_free_long_object(objset_t *os, uint64_t object);
+int dmu_free_long_object_raw(objset_t *os, uint64_t object);
 
 /*
  * Convenience functions.
@@ -797,10 +800,11 @@ int dmu_write_uio_dnode(dnode_t *dn, struct uio *uio, uint64_t size,
 #endif
 struct arc_buf *dmu_request_arcbuf(dmu_buf_t *handle, int size);
 void dmu_return_arcbuf(struct arc_buf *buf);
-void dmu_assign_arcbuf(dmu_buf_t *handle, uint64_t offset, struct arc_buf *buf,
-    dmu_tx_t *tx);
-void dmu_assign_arcbuf_impl(dmu_buf_t *handle, struct arc_buf *buf,
-    dmu_tx_t *tx);
+void dmu_assign_arcbuf_by_dnode(dnode_t *dn, uint64_t offset,
+    struct arc_buf *buf, dmu_tx_t *tx);
+void dmu_assign_arcbuf_by_dbuf(dmu_buf_t *handle, uint64_t offset,
+    struct arc_buf *buf, dmu_tx_t *tx);
+#define        dmu_assign_arcbuf       dmu_assign_arcbuf_by_dbuf
 void dmu_convert_to_raw(dmu_buf_t *handle, boolean_t byteorder,
     const uint8_t *salt, const uint8_t *iv, const uint8_t *mac, dmu_tx_t *tx);
 void dmu_copy_from_buf(objset_t *os, uint64_t object, uint64_t offset,
index 1329e8e83c5b19a75cca40cee4cb0d3c47cc87b4..6256fed2b9754cb55ca78a5f6551879c22ebe10c 100644 (file)
@@ -3155,13 +3155,14 @@ arc_buf_destroy_impl(arc_buf_t *buf)
                        hdr->b_crypt_hdr.b_ebufcnt -= 1;
 
                /*
-                * if we have no more encrypted buffers and we've already
+                * If we have no more encrypted buffers and we've already
                 * gotten a copy of the decrypted data we can free b_rabd to
                 * save some space.
                 */
                if (hdr->b_crypt_hdr.b_ebufcnt == 0 && HDR_HAS_RABD(hdr) &&
-                   hdr->b_l1hdr.b_pabd != NULL)
+                   hdr->b_l1hdr.b_pabd != NULL && !HDR_IO_IN_PROGRESS(hdr)) {
                        arc_hdr_free_abd(hdr, B_TRUE);
+               }
        }
 
        arc_buf_t *lastbuf = arc_buf_remove(hdr, buf);
@@ -3716,9 +3717,8 @@ arc_hdr_destroy(arc_buf_hdr_t *hdr)
                        arc_hdr_free_abd(hdr, B_FALSE);
                }
 
-               if (HDR_HAS_RABD(hdr)) {
+               if (HDR_HAS_RABD(hdr))
                        arc_hdr_free_abd(hdr, B_TRUE);
-               }
        }
 
        ASSERT3P(hdr->b_hash_next, ==, NULL);
@@ -5746,16 +5746,15 @@ arc_read_done(zio_t *zio)
                callback_cnt++;
 
                int error = arc_buf_alloc_impl(hdr, zio->io_spa,
-                   zio->io_bookmark.zb_objset, acb->acb_private,
-                   acb->acb_encrypted, acb->acb_compressed, acb->acb_noauth,
-                   no_zio_error, &acb->acb_buf);
+                   acb->acb_dsobj, acb->acb_private, acb->acb_encrypted,
+                   acb->acb_compressed, acb->acb_noauth, no_zio_error,
+                   &acb->acb_buf);
 
                /*
-                * assert non-speculative zios didn't fail because an
+                * Assert non-speculative zios didn't fail because an
                 * encryption key wasn't loaded
                 */
-               ASSERT((zio->io_flags & ZIO_FLAG_SPECULATIVE) ||
-                   error == 0 || error != ENOENT);
+               ASSERT((zio->io_flags & ZIO_FLAG_SPECULATIVE) || error == 0);
 
                /*
                 * If we failed to decrypt, report an error now (as the zio
@@ -5778,10 +5777,8 @@ arc_read_done(zio_t *zio)
        }
        hdr->b_l1hdr.b_acb = NULL;
        arc_hdr_clear_flags(hdr, ARC_FLAG_IO_IN_PROGRESS);
-       if (callback_cnt == 0) {
-               ASSERT(HDR_PREFETCH(hdr) || HDR_HAS_RABD(hdr));
+       if (callback_cnt == 0)
                ASSERT(hdr->b_l1hdr.b_pabd != NULL || HDR_HAS_RABD(hdr));
-       }
 
        ASSERT(refcount_is_zero(&hdr->b_l1hdr.b_refcnt) ||
            callback_list != NULL);
@@ -5943,6 +5940,9 @@ top:
                                acb->acb_done = done;
                                acb->acb_private = private;
                                acb->acb_compressed = compressed_read;
+                               acb->acb_encrypted = encrypted_read;
+                               acb->acb_noauth = noauth_read;
+                               acb->acb_dsobj = zb->zb_objset;
                                if (pio != NULL)
                                        acb->acb_zio_dummy = zio_null(pio,
                                            spa, NULL, NULL, NULL, zio_flags);
@@ -5981,9 +5981,7 @@ top:
                        rc = arc_buf_alloc_impl(hdr, spa, zb->zb_objset,
                            private, encrypted_read, compressed_read,
                            noauth_read, B_TRUE, &buf);
-
-                       ASSERT((zio_flags & ZIO_FLAG_SPECULATIVE) ||
-                           rc == 0 || rc != ENOENT);
+                       ASSERT((zio_flags & ZIO_FLAG_SPECULATIVE) || rc == 0);
                } else if (*arc_flags & ARC_FLAG_PREFETCH &&
                    refcount_count(&hdr->b_l1hdr.b_refcnt) == 0) {
                        arc_hdr_set_flags(hdr, ARC_FLAG_PREFETCH);
@@ -6008,7 +6006,7 @@ top:
                uint64_t addr = 0;
                boolean_t devw = B_FALSE;
                uint64_t size;
-               void *hdr_abd;
+               abd_t *hdr_abd;
 
                /*
                 * Gracefully handle a damaged logical block size as a
@@ -6131,6 +6129,7 @@ top:
                acb->acb_compressed = compressed_read;
                acb->acb_encrypted = encrypted_read;
                acb->acb_noauth = noauth_read;
+               acb->acb_dsobj = zb->zb_objset;
 
                ASSERT3P(hdr->b_l1hdr.b_acb, ==, NULL);
                hdr->b_l1hdr.b_acb = acb;
@@ -7961,9 +7960,15 @@ l2arc_untransform(zio_t *zio, l2arc_read_callback_t *cb)
         */
        ASSERT3U(BP_GET_TYPE(bp), !=, DMU_OT_INTENT_LOG);
        ASSERT(MUTEX_HELD(HDR_LOCK(hdr)));
+       ASSERT3P(hdr->b_l1hdr.b_pabd, !=, NULL);
 
-       /* If the data was encrypted, decrypt it now */
-       if (HDR_ENCRYPTED(hdr)) {
+       /*
+        * If the data was encrypted, decrypt it now. Note that
+        * we must check the bp here and not the hdr, since the
+        * hdr does not have its encryption parameters updated
+        * until arc_read_done().
+        */
+       if (BP_IS_ENCRYPTED(bp)) {
                abd_t *eabd = arc_get_data_abd(hdr,
                    arc_hdr_size(hdr), hdr);
 
@@ -8089,7 +8094,16 @@ l2arc_read_done(zio_t *zio)
                 */
                abd_free(cb->l2rcb_abd);
                zio->io_size = zio->io_orig_size = arc_hdr_size(hdr);
-               zio->io_abd = zio->io_orig_abd = hdr->b_l1hdr.b_pabd;
+
+               if (BP_IS_ENCRYPTED(&cb->l2rcb_bp) &&
+                   (cb->l2rcb_flags & ZIO_FLAG_RAW_ENCRYPT)) {
+                       ASSERT(HDR_HAS_RABD(hdr));
+                       zio->io_abd = zio->io_orig_abd =
+                           hdr->b_crypt_hdr.b_rabd;
+               } else {
+                       ASSERT3P(hdr->b_l1hdr.b_pabd, !=, NULL);
+                       zio->io_abd = zio->io_orig_abd = hdr->b_l1hdr.b_pabd;
+               }
        }
 
        ASSERT3P(zio->io_abd, !=, NULL);
index 537f22011cd7817bcf0dc56d9e668de14959b6cb..c954dec1bedb6d64cba3f4029c1e423e90e0d7ec 100644 (file)
@@ -2153,6 +2153,13 @@ dbuf_assign_arcbuf(dmu_buf_impl_t *db, arc_buf_t *buf, dmu_tx_t *tx)
 
        if (db->db_state == DB_CACHED &&
            refcount_count(&db->db_holds) - 1 > db->db_dirtycnt) {
+               /*
+                * In practice, we will never have a case where we have an
+                * encrypted arc buffer while additional holds exist on the
+                * dbuf. We don't handle this here so we simply assert that
+                * fact instead.
+                */
+               ASSERT(!arc_is_encrypted(buf));
                mutex_exit(&db->db_mtx);
                (void) dbuf_dirty(db, tx);
                bcopy(buf->b_data, db->db.db_data, db->db.db_size);
@@ -2168,6 +2175,8 @@ dbuf_assign_arcbuf(dmu_buf_impl_t *db, arc_buf_t *buf, dmu_tx_t *tx)
                ASSERT(db->db_buf != NULL);
                if (dr != NULL && dr->dr_txg == tx->tx_txg) {
                        ASSERT(dr->dt.dl.dr_data == db->db_buf);
+                       IMPLY(arc_is_encrypted(buf), dr->dt.dl.dr_raw);
+
                        if (!arc_released(db->db_buf)) {
                                ASSERT(dr->dt.dl.dr_override_state ==
                                    DR_OVERRIDDEN);
index 1eb35b935174ab9a5c53357e29daacfe59e3b7af..1aba0b133e408c0a5131c00374c6912cc48d1274 100644 (file)
@@ -761,7 +761,7 @@ dmu_objset_zfs_unmounting(objset_t *os)
 
 static int
 dmu_free_long_range_impl(objset_t *os, dnode_t *dn, uint64_t offset,
-    uint64_t length)
+    uint64_t length, boolean_t raw)
 {
        uint64_t object_size;
        int err;
@@ -844,6 +844,17 @@ dmu_free_long_range_impl(objset_t *os, dnode_t *dn, uint64_t offset,
                    uint64_t, long_free_dirty_all_txgs, uint64_t, chunk_len,
                    uint64_t, dmu_tx_get_txg(tx));
                dnode_free_range(dn, chunk_begin, chunk_len, tx);
+
+               /* if this is a raw free, mark the dirty record as such */
+               if (raw) {
+                       dbuf_dirty_record_t *dr = dn->dn_dbuf->db_last_dirty;
+
+                       while (dr != NULL && dr->dr_txg > tx->tx_txg)
+                               dr = dr->dr_next;
+                       if (dr != NULL && dr->dr_txg == tx->tx_txg)
+                               dr->dt.dl.dr_raw = B_TRUE;
+               }
+
                dmu_tx_commit(tx);
 
                length -= chunk_len;
@@ -861,7 +872,7 @@ dmu_free_long_range(objset_t *os, uint64_t object,
        err = dnode_hold(os, object, FTAG, &dn);
        if (err != 0)
                return (err);
-       err = dmu_free_long_range_impl(os, dn, offset, length);
+       err = dmu_free_long_range_impl(os, dn, offset, length, B_FALSE);
 
        /*
         * It is important to zero out the maxblkid when freeing the entire
@@ -876,8 +887,37 @@ dmu_free_long_range(objset_t *os, uint64_t object,
        return (err);
 }
 
+/*
+ * This function is equivalent to dmu_free_long_range(), but also
+ * marks the new dirty record as a raw write.
+ */
 int
-dmu_free_long_object(objset_t *os, uint64_t object)
+dmu_free_long_range_raw(objset_t *os, uint64_t object,
+    uint64_t offset, uint64_t length)
+{
+       dnode_t *dn;
+       int err;
+
+       err = dnode_hold(os, object, FTAG, &dn);
+       if (err != 0)
+               return (err);
+       err = dmu_free_long_range_impl(os, dn, offset, length, B_TRUE);
+
+       /*
+        * It is important to zero out the maxblkid when freeing the entire
+        * file, so that (a) subsequent calls to dmu_free_long_range_impl()
+        * will take the fast path, and (b) dnode_reallocate() can verify
+        * that the entire file has been freed.
+        */
+       if (err == 0 && offset == 0 && length == DMU_OBJECT_END)
+               dn->dn_maxblkid = 0;
+
+       dnode_rele(dn, FTAG);
+       return (err);
+}
+
+static int
+dmu_free_long_object_impl(objset_t *os, uint64_t object, boolean_t raw)
 {
        dmu_tx_t *tx;
        int err;
@@ -893,6 +933,9 @@ dmu_free_long_object(objset_t *os, uint64_t object)
        err = dmu_tx_assign(tx, TXG_WAIT);
        if (err == 0) {
                err = dmu_object_free(os, object, tx);
+               if (err == 0 && raw)
+                       VERIFY0(dmu_object_dirty_raw(os, object, tx));
+
                dmu_tx_commit(tx);
        } else {
                dmu_tx_abort(tx);
@@ -901,6 +944,19 @@ dmu_free_long_object(objset_t *os, uint64_t object)
        return (err);
 }
 
+int
+dmu_free_long_object(objset_t *os, uint64_t object)
+{
+       return (dmu_free_long_object_impl(os, object, B_FALSE));
+}
+
+int
+dmu_free_long_object_raw(objset_t *os, uint64_t object)
+{
+       return (dmu_free_long_object_impl(os, object, B_TRUE));
+}
+
+
 int
 dmu_free_range(objset_t *os, uint64_t object, uint64_t offset,
     uint64_t size, dmu_tx_t *tx)
@@ -1486,13 +1542,6 @@ dmu_return_arcbuf(arc_buf_t *buf)
        arc_buf_destroy(buf, FTAG);
 }
 
-void
-dmu_assign_arcbuf_impl(dmu_buf_t *handle, arc_buf_t *buf, dmu_tx_t *tx)
-{
-       dmu_buf_impl_t *db = (dmu_buf_impl_t *)handle;
-       dbuf_assign_arcbuf(db, buf, tx);
-}
-
 void
 dmu_convert_to_raw(dmu_buf_t *handle, boolean_t byteorder, const uint8_t *salt,
     const uint8_t *iv, const uint8_t *mac, dmu_tx_t *tx)
@@ -1569,22 +1618,19 @@ dmu_copy_from_buf(objset_t *os, uint64_t object, uint64_t offset,
  * dmu_write().
  */
 void
-dmu_assign_arcbuf(dmu_buf_t *handle, uint64_t offset, arc_buf_t *buf,
+dmu_assign_arcbuf_by_dnode(dnode_t *dn, uint64_t offset, arc_buf_t *buf,
     dmu_tx_t *tx)
 {
-       dmu_buf_impl_t *dbuf = (dmu_buf_impl_t *)handle;
-       dnode_t *dn;
        dmu_buf_impl_t *db;
+       objset_t *os = dn->dn_objset;
+       uint64_t object = dn->dn_object;
        uint32_t blksz = (uint32_t)arc_buf_lsize(buf);
        uint64_t blkid;
 
-       DB_DNODE_ENTER(dbuf);
-       dn = DB_DNODE(dbuf);
        rw_enter(&dn->dn_struct_rwlock, RW_READER);
        blkid = dbuf_whichblock(dn, 0, offset);
        VERIFY((db = dbuf_hold(dn, blkid, FTAG)) != NULL);
        rw_exit(&dn->dn_struct_rwlock);
-       DB_DNODE_EXIT(dbuf);
 
        /*
         * We can only assign if the offset is aligned, the arc buf is the
@@ -1594,19 +1640,10 @@ dmu_assign_arcbuf(dmu_buf_t *handle, uint64_t offset, arc_buf_t *buf,
                dbuf_assign_arcbuf(db, buf, tx);
                dbuf_rele(db, FTAG);
        } else {
-               objset_t *os;
-               uint64_t object;
-
                /* compressed bufs must always be assignable to their dbuf */
                ASSERT3U(arc_get_compression(buf), ==, ZIO_COMPRESS_OFF);
                ASSERT(!(buf->b_flags & ARC_BUF_FLAG_COMPRESSED));
 
-               DB_DNODE_ENTER(dbuf);
-               dn = DB_DNODE(dbuf);
-               os = dn->dn_objset;
-               object = dn->dn_object;
-               DB_DNODE_EXIT(dbuf);
-
                dbuf_rele(db, FTAG);
                dmu_write(os, object, offset, blksz, buf->b_data, tx);
                dmu_return_arcbuf(buf);
@@ -1614,6 +1651,17 @@ dmu_assign_arcbuf(dmu_buf_t *handle, uint64_t offset, arc_buf_t *buf,
        }
 }
 
+void
+dmu_assign_arcbuf_by_dbuf(dmu_buf_t *handle, uint64_t offset, arc_buf_t *buf,
+    dmu_tx_t *tx)
+{
+       dmu_buf_impl_t *dbuf = (dmu_buf_impl_t *)handle;
+
+       DB_DNODE_ENTER(dbuf);
+       dmu_assign_arcbuf_by_dnode(DB_DNODE(dbuf), offset, buf, tx);
+       DB_DNODE_EXIT(dbuf);
+}
+
 typedef struct {
        dbuf_dirty_record_t     *dsa_dr;
        dmu_sync_cb_t           *dsa_done;
@@ -2424,7 +2472,9 @@ EXPORT_SYMBOL(dmu_buf_rele_array);
 EXPORT_SYMBOL(dmu_prefetch);
 EXPORT_SYMBOL(dmu_free_range);
 EXPORT_SYMBOL(dmu_free_long_range);
+EXPORT_SYMBOL(dmu_free_long_range_raw);
 EXPORT_SYMBOL(dmu_free_long_object);
+EXPORT_SYMBOL(dmu_free_long_object_raw);
 EXPORT_SYMBOL(dmu_read);
 EXPORT_SYMBOL(dmu_read_by_dnode);
 EXPORT_SYMBOL(dmu_write);
@@ -2443,7 +2493,8 @@ EXPORT_SYMBOL(dmu_write_policy);
 EXPORT_SYMBOL(dmu_sync);
 EXPORT_SYMBOL(dmu_request_arcbuf);
 EXPORT_SYMBOL(dmu_return_arcbuf);
-EXPORT_SYMBOL(dmu_assign_arcbuf);
+EXPORT_SYMBOL(dmu_assign_arcbuf_by_dnode);
+EXPORT_SYMBOL(dmu_assign_arcbuf_by_dbuf);
 EXPORT_SYMBOL(dmu_buf_hold);
 EXPORT_SYMBOL(dmu_ot);
 
index 235e832d74801af5019a96a27a1191d5f5caea71..4318a7815893c80006d0af8f6bfd4db190340f81 100644 (file)
@@ -2592,7 +2592,11 @@ receive_freeobjects(struct receive_writer_arg *rwa,
                else if (err != 0)
                        return (err);
 
-               err = dmu_free_long_object(rwa->os, obj);
+               if (rwa->raw)
+                       err = dmu_free_long_object_raw(rwa->os, obj);
+               else
+                       err = dmu_free_long_object(rwa->os, obj);
+
                if (err != 0)
                        return (err);
 
@@ -2608,9 +2612,9 @@ noinline static int
 receive_write(struct receive_writer_arg *rwa, struct drr_write *drrw,
     arc_buf_t *abuf)
 {
-       dmu_tx_t *tx;
-       dmu_buf_t *bonus;
        int err;
+       dmu_tx_t *tx;
+       dnode_t *dn;
 
        if (drrw->drr_offset + drrw->drr_logical_size < drrw->drr_offset ||
            !DMU_OT_IS_VALID(drrw->drr_type))
@@ -2635,7 +2639,6 @@ receive_write(struct receive_writer_arg *rwa, struct drr_write *drrw,
                return (SET_ERROR(EINVAL));
 
        tx = dmu_tx_create(rwa->os);
-
        dmu_tx_hold_write(tx, drrw->drr_object,
            drrw->drr_offset, drrw->drr_logical_size);
        err = dmu_tx_assign(tx, TXG_WAIT);
@@ -2655,10 +2658,9 @@ receive_write(struct receive_writer_arg *rwa, struct drr_write *drrw,
                    DRR_WRITE_PAYLOAD_SIZE(drrw));
        }
 
-       /* use the bonus buf to look up the dnode in dmu_assign_arcbuf */
-       if (dmu_bonus_hold(rwa->os, drrw->drr_object, FTAG, &bonus) != 0)
-               return (SET_ERROR(EINVAL));
-       dmu_assign_arcbuf(bonus, drrw->drr_offset, abuf, tx);
+       VERIFY0(dnode_hold(rwa->os, drrw->drr_object, FTAG, &dn));
+       dmu_assign_arcbuf_by_dnode(dn, drrw->drr_offset, abuf, tx);
+       dnode_rele(dn, FTAG);
 
        /*
         * Note: If the receive fails, we want the resume stream to start
@@ -2668,7 +2670,6 @@ receive_write(struct receive_writer_arg *rwa, struct drr_write *drrw,
         */
        save_resume_state(rwa, drrw->drr_object, drrw->drr_offset, tx);
        dmu_tx_commit(tx);
-       dmu_buf_rele(bonus, FTAG);
 
        return (0);
 }
@@ -2767,6 +2768,8 @@ receive_write_embedded(struct receive_writer_arg *rwa,
                return (SET_ERROR(EINVAL));
        if (drrwe->drr_compression >= ZIO_COMPRESS_FUNCTIONS)
                return (SET_ERROR(EINVAL));
+       if (rwa->raw)
+               return (SET_ERROR(EINVAL));
 
        if (drrwe->drr_object > rwa->max_object)
                rwa->max_object = drrwe->drr_object;
@@ -2841,7 +2844,7 @@ receive_spill(struct receive_writer_arg *rwa, struct drr_spill *drrs,
        if (db_spill->db_size < drrs->drr_length)
                VERIFY(0 == dbuf_spill_set_blksz(db_spill,
                    drrs->drr_length, tx));
-       dmu_assign_arcbuf_impl(db_spill, abuf, tx);
+       dbuf_assign_arcbuf((dmu_buf_impl_t *)db_spill, abuf, tx);
 
        dmu_buf_rele(db, FTAG);
        dmu_buf_rele(db_spill, FTAG);
@@ -2866,8 +2869,13 @@ receive_free(struct receive_writer_arg *rwa, struct drr_free *drrf)
        if (drrf->drr_object > rwa->max_object)
                rwa->max_object = drrf->drr_object;
 
-       err = dmu_free_long_range(rwa->os, drrf->drr_object,
-           drrf->drr_offset, drrf->drr_length);
+       if (rwa->raw) {
+               err = dmu_free_long_range_raw(rwa->os, drrf->drr_object,
+                   drrf->drr_offset, drrf->drr_length);
+       } else {
+               err = dmu_free_long_range(rwa->os, drrf->drr_object,
+                   drrf->drr_offset, drrf->drr_length);
+       }
 
        return (err);
 }
index a6c27b4be29f549f45803384573d560fd2bb9b4e..268d82ce4bd4345c4b8a2354e9a5de81aa97a1cb 100644 (file)
@@ -181,7 +181,7 @@ traverse_prefetch_metadata(traverse_data_t *td,
     const blkptr_t *bp, const zbookmark_phys_t *zb)
 {
        arc_flags_t flags = ARC_FLAG_NOWAIT | ARC_FLAG_PREFETCH;
-       int zio_flags = ZIO_FLAG_CANFAIL;
+       int zio_flags = ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE;
 
        if (!(td->td_flags & TRAVERSE_PREFETCH_METADATA))
                return;
index 62241a46b1a65d0e3468027698046bcb822ef24f..85dd4a049a7d5e246bd2e092949b8b0878cea771 100644 (file)
@@ -841,8 +841,8 @@ zfs_write(struct inode *ip, uio_t *uio, int ioflag, cred_t *cr)
                                xuio_stat_wbuf_copied();
                        } else {
                                ASSERT(xuio || tx_bytes == max_blksz);
-                               dmu_assign_arcbuf(sa_get_db(zp->z_sa_hdl),
-                                   woff, abuf, tx);
+                               dmu_assign_arcbuf_by_dbuf(
+                                   sa_get_db(zp->z_sa_hdl), woff, abuf, tx);
                        }
                        ASSERT(tx_bytes <= uio->uio_resid);
                        uioskip(uio, tx_bytes);