]> git.proxmox.com Git - mirror_zfs.git/blobdiff - module/zfs/dmu_recv.c
Fix typo/etc in module/zfs/zfs_ctldir.c
[mirror_zfs.git] / module / zfs / dmu_recv.c
index e05b5ad821f0ec81299a057babd16addd88a43a1..fc5d47f5febbdd2942c62996d02ff4b18fc48d1e 100644 (file)
@@ -72,7 +72,6 @@ typedef struct dmu_recv_begin_arg {
        dmu_recv_cookie_t *drba_cookie;
        cred_t *drba_cred;
        dsl_crypto_params_t *drba_dcp;
-       uint64_t drba_snapobj;
 } dmu_recv_begin_arg_t;
 
 static int
@@ -128,7 +127,7 @@ recv_begin_check_existing_impl(dmu_recv_begin_arg_t *drba, dsl_dataset_t *ds,
                dsl_dataset_t *snap;
                uint64_t obj = dsl_dataset_phys(ds)->ds_prev_snap_obj;
 
-               /* Can't perform a raw receive on top of a non-raw receive */
+               /* Can't raw receive on top of an unencrypted dataset */
                if (!encrypted && raw)
                        return (SET_ERROR(EINVAL));
 
@@ -155,7 +154,7 @@ recv_begin_check_existing_impl(dmu_recv_begin_arg_t *drba, dsl_dataset_t *ds,
                        return (SET_ERROR(ENODEV));
 
                if (drba->drba_cookie->drc_force) {
-                       drba->drba_snapobj = obj;
+                       drba->drba_cookie->drc_fromsnapobj = obj;
                } else {
                        /*
                         * If we are not forcing, there must be no
@@ -165,7 +164,8 @@ recv_begin_check_existing_impl(dmu_recv_begin_arg_t *drba, dsl_dataset_t *ds,
                                dsl_dataset_rele(snap, FTAG);
                                return (SET_ERROR(ETXTBSY));
                        }
-                       drba->drba_snapobj = ds->ds_prev->ds_object;
+                       drba->drba_cookie->drc_fromsnapobj =
+                           ds->ds_prev->ds_object;
                }
 
                dsl_dataset_rele(snap, FTAG);
@@ -200,7 +200,7 @@ recv_begin_check_existing_impl(dmu_recv_begin_arg_t *drba, dsl_dataset_t *ds,
                                return (SET_ERROR(EINVAL));
                }
 
-               drba->drba_snapobj = 0;
+               drba->drba_cookie->drc_fromsnapobj = 0;
        }
 
        return (0);
@@ -440,7 +440,7 @@ dmu_recv_begin_sync(void *arg, dmu_tx_t *tx)
         * the raw cmd set. Raw incremental recvs do not use a dcp
         * since the encryption parameters are already set in stone.
         */
-       if (dcp == NULL && drba->drba_snapobj == 0 &&
+       if (dcp == NULL && drba->drba_cookie->drc_fromsnapobj == 0 &&
            drba->drba_origin == NULL) {
                ASSERT3P(dcp, ==, NULL);
                dcp = &dummy_dcp;
@@ -454,15 +454,15 @@ dmu_recv_begin_sync(void *arg, dmu_tx_t *tx)
                /* create temporary clone */
                dsl_dataset_t *snap = NULL;
 
-               if (drba->drba_snapobj != 0) {
+               if (drba->drba_cookie->drc_fromsnapobj != 0) {
                        VERIFY0(dsl_dataset_hold_obj(dp,
-                           drba->drba_snapobj, FTAG, &snap));
+                           drba->drba_cookie->drc_fromsnapobj, FTAG, &snap));
                        ASSERT3P(dcp, ==, NULL);
                }
 
                dsobj = dsl_dataset_create_sync(ds->ds_dir, recv_clone_name,
                    snap, crflags, drba->drba_cred, dcp, tx);
-               if (drba->drba_snapobj != 0)
+               if (drba->drba_cookie->drc_fromsnapobj != 0)
                        dsl_dataset_rele(snap, FTAG);
                dsl_dataset_rele_flags(ds, dsflags, FTAG);
        } else {
@@ -1176,13 +1176,26 @@ receive_object(struct receive_writer_arg *rwa, struct drr_object *drro,
                    1ULL << drro->drr_indblkshift : 0;
                int nblkptr = deduce_nblkptr(drro->drr_bonustype,
                    drro->drr_bonuslen);
+               boolean_t did_free = B_FALSE;
 
                object = drro->drr_object;
 
-               /* nblkptr will be bounded by the bonus size and type */
+               /* nblkptr should be bounded by the bonus size and type */
                if (rwa->raw && nblkptr != drro->drr_nblkptr)
                        return (SET_ERROR(EINVAL));
 
+               /*
+                * Check for indicators that the object was freed and
+                * reallocated. For all sends, these indicators are:
+                *     - A changed block size
+                *     - A smaller nblkptr
+                *     - A changed dnode size
+                * For raw sends we also check a few other fields to
+                * ensure we are preserving the objset structure exactly
+                * as it was on the receive side:
+                *     - A changed indirect block size
+                *     - A smaller nlevels
+                */
                if (drro->drr_blksz != doi.doi_data_block_size ||
                    nblkptr < doi.doi_nblkptr ||
                    dn_slots != doi.doi_dnodesize >> DNODE_SHIFT ||
@@ -1193,17 +1206,20 @@ receive_object(struct receive_writer_arg *rwa, struct drr_object *drro,
                            drro->drr_object, 0, DMU_OBJECT_END);
                        if (err != 0)
                                return (SET_ERROR(EINVAL));
+                       else
+                               did_free = B_TRUE;
                }
 
                /*
                 * The dmu does not currently support decreasing nlevels
-                * on an object. For non-raw sends, this does not matter
-                * and the new object can just use the previous one's nlevels.
-                * For raw sends, however, the structure of the received dnode
-                * (including nlevels) must match that of the send side.
-                * Therefore, instead of using dmu_object_reclaim(), we must
-                * free the object completely and call dmu_object_claim_dnsize()
-                * instead.
+                * or changing the number of dnode slots on an object. For
+                * non-raw sends, this does not matter and the new object
+                * can just use the previous one's nlevels. For raw sends,
+                * however, the structure of the received dnode (including
+                * nlevels and dnode slots) must match that of the send
+                * side. Therefore, instead of using dmu_object_reclaim(),
+                * we must free the object completely and call
+                * dmu_object_claim_dnsize() instead.
                 */
                if ((rwa->raw && drro->drr_nlevels < doi.doi_indirection) ||
                    dn_slots != doi.doi_dnodesize >> DNODE_SHIFT) {
@@ -1214,6 +1230,27 @@ receive_object(struct receive_writer_arg *rwa, struct drr_object *drro,
                        txg_wait_synced(dmu_objset_pool(rwa->os), 0);
                        object = DMU_NEW_OBJECT;
                }
+
+               /*
+                * For raw receives, free everything beyond the new incoming
+                * maxblkid. Normally this would be done with a DRR_FREE
+                * record that would come after this DRR_OBJECT record is
+                * processed. However, for raw receives we manually set the
+                * maxblkid from the drr_maxblkid and so we must first free
+                * everything above that blkid to ensure the DMU is always
+                * consistent with itself. We will never free the first block
+                * of the object here because a maxblkid of 0 could indicate
+                * an object with a single block or one with no blocks. This
+                * free may be skipped when dmu_free_long_range() was called
+                * above since it covers the entire object's contents.
+                */
+               if (rwa->raw && object != DMU_NEW_OBJECT && !did_free) {
+                       err = dmu_free_long_range(rwa->os, drro->drr_object,
+                           (drro->drr_maxblkid + 1) * doi.doi_data_block_size,
+                           DMU_OBJECT_END);
+                       if (err != 0)
+                               return (SET_ERROR(EINVAL));
+               }
        } else if (err == EEXIST) {
                /*
                 * The object requested is currently an interior slot of a
@@ -1223,7 +1260,12 @@ receive_object(struct receive_writer_arg *rwa, struct drr_object *drro,
                 * earlier in the stream.
                 */
                txg_wait_synced(dmu_objset_pool(rwa->os), 0);
-               object = drro->drr_object;
+
+               if (dmu_object_info(rwa->os, drro->drr_object, NULL) != ENOENT)
+                       return (SET_ERROR(EINVAL));
+
+               /* object was freed and we are about to allocate a new one */
+               object = DMU_NEW_OBJECT;
        } else {
                /* object is free and we are about to allocate a new one */
                object = DMU_NEW_OBJECT;
@@ -1250,7 +1292,6 @@ receive_object(struct receive_writer_arg *rwa, struct drr_object *drro,
                                return (err);
 
                        err = dmu_free_long_object(rwa->os, slot);
-
                        if (err != 0)
                                return (err);
 
@@ -1286,6 +1327,7 @@ receive_object(struct receive_writer_arg *rwa, struct drr_object *drro,
                    drro->drr_bonustype, drro->drr_bonuslen,
                    dn_slots << DNODE_SHIFT, tx);
        }
+
        if (err != 0) {
                dmu_tx_commit(tx);
                return (SET_ERROR(EINVAL));
@@ -1333,14 +1375,21 @@ receive_object(struct receive_writer_arg *rwa, struct drr_object *drro,
        /* handle more restrictive dnode structuring for raw recvs */
        if (rwa->raw) {
                /*
-                * Set the indirect block shift and nlevels. This will not fail
-                * because we ensured all of the blocks were free earlier if
-                * this is a new object.
+                * Set the indirect block size, block shift, nlevels.
+                * This will not fail because we ensured all of the
+                * blocks were freed earlier if this is a new object.
+                * For non-new objects block size and indirect block
+                * shift cannot change and nlevels can only increase.
                 */
                VERIFY0(dmu_object_set_blocksize(rwa->os, drro->drr_object,
                    drro->drr_blksz, drro->drr_indblkshift, tx));
                VERIFY0(dmu_object_set_nlevels(rwa->os, drro->drr_object,
                    drro->drr_nlevels, tx));
+
+               /*
+                * Set the maxblkid. This will always succeed because
+                * we freed all blocks beyond the new maxblkid above.
+                */
                VERIFY0(dmu_object_set_maxblkid(rwa->os, drro->drr_object,
                    drro->drr_maxblkid, tx));
        }
@@ -2145,7 +2194,7 @@ dprintf_drr(struct receive_record_arg *rrd, int err)
        {
                struct drr_write *drrw = &rrd->header.drr_u.drr_write;
                dprintf("drr_type = WRITE obj = %llu type = %u offset = %llu "
-                   "lsize = %llu cksumtype = %u cksumflags = %u "
+                   "lsize = %llu cksumtype = %u flags = %u "
                    "compress = %u psize = %llu err = %d\n",
                    drrw->drr_object, drrw->drr_type, drrw->drr_offset,
                    drrw->drr_logical_size, drrw->drr_checksumtype,
@@ -2160,7 +2209,7 @@ dprintf_drr(struct receive_record_arg *rrd, int err)
                dprintf("drr_type = WRITE_BYREF obj = %llu offset = %llu "
                    "length = %llu toguid = %llx refguid = %llx "
                    "refobject = %llu refoffset = %llu cksumtype = %u "
-                   "cksumflags = %u err = %d\n",
+                   "flags = %u err = %d\n",
                    drrwbr->drr_object, drrwbr->drr_offset,
                    drrwbr->drr_length, drrwbr->drr_toguid,
                    drrwbr->drr_refguid, drrwbr->drr_refobject,
@@ -2196,6 +2245,16 @@ dprintf_drr(struct receive_record_arg *rrd, int err)
                    "err = %d\n", drrs->drr_object, drrs->drr_length, err);
                break;
        }
+       case DRR_OBJECT_RANGE:
+       {
+               struct drr_object_range *drror =
+                   &rrd->header.drr_u.drr_object_range;
+               dprintf("drr_type = OBJECT_RANGE firstobj = %llu "
+                   "numslots = %llu flags = %u err = %d\n",
+                   drror->drr_firstobj, drror->drr_numslots,
+                   drror->drr_flags, err);
+               break;
+       }
        default:
                return;
        }
@@ -2279,10 +2338,11 @@ receive_process_record(struct receive_writer_arg *rwa,
        {
                struct drr_object_range *drror =
                    &rrd->header.drr_u.drr_object_range;
-               return (receive_object_range(rwa, drror));
+               err = receive_object_range(rwa, drror);
+               break;
        }
        default:
-               return (SET_ERROR(EINVAL));
+               err = (SET_ERROR(EINVAL));
        }
 
        if (err != 0)
@@ -2486,11 +2546,16 @@ dmu_recv_stream(dmu_recv_cookie_t *drc, vnode_t *vp, offset_t *voffp,
                 * the keynvl away until then.
                 */
                err = dsl_crypto_recv_raw(spa_name(ra->os->os_spa),
-                   drc->drc_ds->ds_object, drc->drc_drrb->drr_type,
-                   keynvl, drc->drc_newfs);
+                   drc->drc_ds->ds_object, drc->drc_fromsnapobj,
+                   drc->drc_drrb->drr_type, keynvl, drc->drc_newfs);
                if (err != 0)
                        goto out;
 
+               /* see comment in dmu_recv_end_sync() */
+               drc->drc_ivset_guid = 0;
+               (void) nvlist_lookup_uint64(keynvl, "to_ivset_guid",
+                   &drc->drc_ivset_guid);
+
                if (!drc->drc_newfs)
                        drc->drc_keynvl = fnvlist_dup(keynvl);
        }
@@ -2551,10 +2616,10 @@ dmu_recv_stream(dmu_recv_cookie_t *drc, vnode_t *vp, offset_t *voffp,
                    sizeof (struct receive_record_arg) + ra->rrd->payload_size);
                ra->rrd = NULL;
        }
-       if (ra->next_rrd == NULL)
-               ra->next_rrd = kmem_zalloc(sizeof (*ra->next_rrd), KM_SLEEP);
-       ra->next_rrd->eos_marker = B_TRUE;
-       bqueue_enqueue(&rwa->q, ra->next_rrd, 1);
+       ASSERT3P(ra->rrd, ==, NULL);
+       ra->rrd = kmem_zalloc(sizeof (*ra->rrd), KM_SLEEP);
+       ra->rrd->eos_marker = B_TRUE;
+       bqueue_enqueue(&rwa->q, ra->rrd, 1);
 
        mutex_enter(&rwa->mutex);
        while (!rwa->done) {
@@ -2595,6 +2660,14 @@ dmu_recv_stream(dmu_recv_cookie_t *drc, vnode_t *vp, offset_t *voffp,
                err = rwa->err;
 
 out:
+       /*
+        * If we hit an error before we started the receive_writer_thread
+        * we need to clean up the next_rrd we create by processing the
+        * DRR_BEGIN record.
+        */
+       if (ra->next_rrd != NULL)
+               kmem_free(ra->next_rrd, sizeof (*ra->next_rrd));
+
        nvlist_free(begin_nvl);
        if ((featureflags & DMU_BACKUP_FEATURE_DEDUP) && (cleanup_fd != -1))
                zfs_onexit_fd_rele(cleanup_fd);
@@ -2798,6 +2871,25 @@ dmu_recv_end_sync(void *arg, dmu_tx_t *tx)
                drc->drc_newsnapobj =
                    dsl_dataset_phys(drc->drc_ds)->ds_prev_snap_obj;
        }
+
+       /*
+        * If this is a raw receive, the crypt_keydata nvlist will include
+        * a to_ivset_guid for us to set on the new snapshot. This value
+        * will override the value generated by the snapshot code. However,
+        * this value may not be present, because older implementations of
+        * the raw send code did not include this value, and we are still
+        * allowed to receive them if the zfs_disable_ivset_guid_check
+        * tunable is set, in which case we will leave the newly-generated
+        * value.
+        */
+       if (drc->drc_raw && drc->drc_ivset_guid != 0) {
+               dmu_object_zapify(dp->dp_meta_objset, drc->drc_newsnapobj,
+                   DMU_OT_DSL_DATASET, tx);
+               VERIFY0(zap_update(dp->dp_meta_objset, drc->drc_newsnapobj,
+                   DS_FIELD_IVSET_GUID, sizeof (uint64_t), 1,
+                   &drc->drc_ivset_guid, tx));
+       }
+
        zvol_create_minors(dp->dp_spa, drc->drc_tofs, B_TRUE);
 
        /*