]> git.proxmox.com Git - mirror_zfs.git/commitdiff
Implement a new type of zfs receive: corrective receive (-c)
authorAlek P <alek-p@users.noreply.github.com>
Thu, 28 Jul 2022 22:52:46 +0000 (18:52 -0400)
committerGitHub <noreply@github.com>
Thu, 28 Jul 2022 22:52:46 +0000 (15:52 -0700)
This type of recv is used to heal corrupted data when a replica
of the data already exists (in the form of a send file for example).
With the provided send stream, corrective receive will read from
disk blocks described by the WRITE records. When any of the reads
come back with ECKSUM we use the data from the corresponding WRITE
record to rewrite the corrupted block.

Reviewed-by: Paul Dagnelie <pcd@delphix.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Paul Zuchowski <pzuchowski@datto.com>
Signed-off-by: Alek Pinchuk <apinchuk@axcient.com>
Closes #9372

28 files changed:
cmd/zfs/zfs_main.c
contrib/pyzfs/libzfs_core/__init__.py
contrib/pyzfs/libzfs_core/_error_translation.py
contrib/pyzfs/libzfs_core/_libzfs_core.py
contrib/pyzfs/libzfs_core/bindings/libzfs_core.py
contrib/pyzfs/libzfs_core/test/test_libzfs_core.py
include/libzfs.h
include/libzfs_core.h
include/sys/dmu_recv.h
include/sys/spa.h
include/sys/spa_impl.h
include/sys/zio.h
lib/libzfs/libzfs_sendrecv.c
lib/libzfs_core/libzfs_core.abi
lib/libzfs_core/libzfs_core.c
man/man4/zfs.4
man/man8/zfs-receive.8
module/zfs/dmu.c
module/zfs/dmu_recv.c
module/zfs/spa.c
module/zfs/spa_errlog.c
module/zfs/zfs_ioctl.c
module/zfs/zio.c
tests/runfiles/common.run
tests/zfs-tests/cmd/libzfs_input_check.c
tests/zfs-tests/tests/Makefile.am
tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_compressed_corrective.ksh [new file with mode: 0755]
tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_corrective.ksh [new file with mode: 0755]

index 88aa7c91f1a0627aee409e8ecaa734c0a5abe935..f1d686753c256ffd70e019c16d90a9da4acd9c33 100644 (file)
@@ -4746,7 +4746,7 @@ zfs_do_receive(int argc, char **argv)
                nomem();
 
        /* check options */
-       while ((c = getopt(argc, argv, ":o:x:dehMnuvFsA")) != -1) {
+       while ((c = getopt(argc, argv, ":o:x:dehMnuvFsAc")) != -1) {
                switch (c) {
                case 'o':
                        if (!parseprop(props, optarg)) {
@@ -4802,6 +4802,9 @@ zfs_do_receive(int argc, char **argv)
                case 'A':
                        abort_resumable = B_TRUE;
                        break;
+               case 'c':
+                       flags.heal = B_TRUE;
+                       break;
                case ':':
                        (void) fprintf(stderr, gettext("missing argument for "
                            "'%c' option\n"), optopt);
index 25ea3e495b02cb05a24abbac50b7f80c840afd5b..a80f94b524ecf146c8fce6b50ebb6695c2f5898a 100644 (file)
@@ -72,6 +72,7 @@ from ._libzfs_core import (
     lzc_receive_resumable,
     lzc_receive_with_cmdprops,
     lzc_receive_with_header,
+    lzc_receive_with_heal,
     lzc_release,
     lzc_reopen,
     lzc_rollback,
@@ -127,6 +128,7 @@ __all__ = [
     'lzc_receive_resumable',
     'lzc_receive_with_cmdprops',
     'lzc_receive_with_header',
+    'lzc_receive_with_heal',
     'lzc_release',
     'lzc_reopen',
     'lzc_rollback',
index f494461f63b2068c4f438ab17bd299fc451a71d6..26676db398c51a3e3d37924910151ac1f0354a1a 100644 (file)
@@ -469,6 +469,8 @@ def lzc_receive_translate_errors(
         raise lzc_exc.ReadOnlyPool(_pool_name(snapname))
     if ret == errno.EAGAIN:
         raise lzc_exc.SuspendedPool(_pool_name(snapname))
+    if ret == errno.EACCES:
+        raise lzc_exc.EncryptionKeyNotLoaded()
     if ret == ECKSUM:
         raise lzc_exc.BadStream()
     if ret == ZFS_ERR_WRONG_PARENT:
index fcfa5be31b1fb9b252af1870c79539bf5c7652bd..fa74ad9a760c85a62c1de6436644108a5845f3f2 100644 (file)
@@ -1426,6 +1426,135 @@ def lzc_receive_with_cmdprops(
     return (int(c_read_bytes[0]), action_handle)
 
 
+@_uncommitted()
+def lzc_receive_with_heal(
+    snapname, fd, begin_record, force=False, corrective=True, resumable=False,
+    raw=False, origin=None, props=None, cmdprops=None, key=None, cleanup_fd=-1,
+    action_handle=0
+):
+    '''
+    Like :func:`lzc_receive_cmdprops`, but allows the caller to pass an
+    additional 'corrective' argument. The 'corrective' boolean set to true
+    indicates that a corruption healing receive should be performed.
+
+    :param bytes snapname: the name of the snapshot to create.
+    :param int fd: the file descriptor from which to read the stream.
+    :param begin_record: the stream's begin record.
+    :type begin_record: ``cffi`` `CData` representing the dmu_replay_record_t
+        structure.
+    :param bool force: whether to roll back or destroy the target filesystem
+        if that is required to receive the stream.
+    :param bool corrective: whether this stream should be used to heal data.
+    :param bool resumable: whether this stream should be treated as resumable.
+        If the receive fails due to premature stream termination, the
+        intermediate state will be preserved on disk and may subsequently be
+        resumed with :func:`lzc_send_resume`.
+    :param bool raw: whether this is a "raw" stream.
+    :param origin: the optional origin snapshot name if the stream is for a
+        clone.
+    :type origin: bytes or None
+    :param props: the properties to set on the snapshot as *received*
+        properties.
+    :type props: dict of bytes : Any
+    :param cmdprops: the properties to set on the snapshot as local overrides
+        to *received* properties. `bool` values are forcefully inherited while
+        every other value is set locally as if the command "zfs set" was
+        invoked immediately before the receive.
+    :type cmdprops: dict of bytes : Any
+    :param key: raw bytes representing user's wrapping key
+    :type key: bytes
+    :param int cleanup_fd: file descriptor used to set a cleanup-on-exit file
+        descriptor.
+    :param int action_handle: variable used to pass the handle for guid/ds
+        mapping: this should be set to zero on first call and will contain an
+        updated handle on success, it should be passed in subsequent calls.
+
+    :return: a tuple with two elements where the first one is the number of
+        bytes read from the file descriptor and the second one is the
+        action_handle return value.
+
+    :raises IOError: if an input / output error occurs while reading from the
+        ``fd``.
+    :raises DatasetExists: if the snapshot named ``snapname`` already exists.
+    :raises DatasetExists: if the stream is a full stream and the destination
+        filesystem already exists.
+    :raises DatasetExists: if ``force`` is `True` but the destination
+        filesystem could not be rolled back to a matching snapshot because a
+        newer snapshot exists and it is an origin of a cloned filesystem.
+    :raises StreamMismatch: if an incremental stream is received and the latest
+        snapshot of the destination filesystem does not match the source
+        snapshot of the stream.
+    :raises StreamMismatch: if a full stream is received and the destination
+        filesystem already exists and it has at least one snapshot, and
+        ``force`` is `False`.
+    :raises StreamMismatch: if an incremental clone stream is received but the
+        specified ``origin`` is not the actual received origin.
+    :raises DestinationModified: if an incremental stream is received and the
+        destination filesystem has been modified since the last snapshot and
+        ``force`` is `False`.
+    :raises DestinationModified: if a full stream is received and the
+        destination filesystem already exists and it does not have any
+        snapshots, and ``force`` is `False`.
+    :raises DatasetNotFound: if the destination filesystem and its parent do
+        not exist.
+    :raises DatasetNotFound: if the ``origin`` is not `None` and does not
+        exist.
+    :raises DatasetBusy: if ``force`` is `True` but the destination filesystem
+        could not be rolled back to a matching snapshot because a newer
+        snapshot is held and could not be destroyed.
+    :raises DatasetBusy: if another receive operation is being performed on the
+        destination filesystem.
+    :raises EncryptionKeyNotLoaded: if corrective is set to true indicates the
+            key must be loaded to do a non-raw corrective recv on an encrypted
+            dataset.
+    :raises BadStream: if corrective is set to true indicates that
+        corrective recv was not able to reconstruct a corrupted block.
+    :raises BadStream: if the stream is corrupt or it is not recognized or it
+        is a compound stream or it is a clone stream, but ``origin`` is `None`.
+    :raises BadStream: if a clone stream is received and the destination
+        filesystem already exists.
+    :raises StreamFeatureNotSupported: if corrective is set to true indicates
+        stream is not compatible with the data in the pool.
+    :raises StreamFeatureNotSupported: if the stream has a feature that is not
+        supported on this side.
+    :raises ReceivePropertyFailure: if one or more of the specified properties
+        is invalid or has an invalid type or value.
+    :raises NameInvalid: if the name of either snapshot is invalid.
+    :raises NameTooLong: if the name of either snapshot is too long.
+    '''
+
+    if origin is not None:
+        c_origin = origin
+    else:
+        c_origin = _ffi.NULL
+    if action_handle is not None:
+        c_action_handle = _ffi.new("uint64_t *")
+    else:
+        c_action_handle = _ffi.NULL
+    c_read_bytes = _ffi.new("uint64_t *")
+    c_errflags = _ffi.new("uint64_t *")
+    if props is None:
+        props = {}
+    if cmdprops is None:
+        cmdprops = {}
+    if key is None:
+        key = b""
+    else:
+        key = bytes(key)
+
+    nvlist = nvlist_in(props)
+    cmdnvlist = nvlist_in(cmdprops)
+    properrs = {}
+    with nvlist_out(properrs) as c_errors:
+        ret = _lib.lzc_receive_with_heal(
+            snapname, nvlist, cmdnvlist, key, len(key), c_origin,
+            force, corrective, resumable, raw, fd, begin_record, cleanup_fd,
+            c_read_bytes, c_errflags, c_action_handle, c_errors)
+    errors.lzc_receive_translate_errors(
+        ret, snapname, fd, force, raw, False, False, origin, properrs)
+    return (int(c_read_bytes[0]), action_handle)
+
+
 @_uncommitted()
 def lzc_reopen(poolname, restart=True):
     '''
index 1b46a08919442ec8e69fbb0936ac298326b34344..bcb9ed379e21d8688e6baa5864a05cf781612b46 100644 (file)
@@ -112,6 +112,10 @@ CDEF = """
         uint8_t *, uint_t, const char *, boolean_t, boolean_t,
         boolean_t, int, const dmu_replay_record_t *, int, uint64_t *,
         uint64_t *, uint64_t *, nvlist_t **);
+    int lzc_receive_with_heal(const char *, nvlist_t *, nvlist_t *,
+        uint8_t *, uint_t, const char *, boolean_t, boolean_t, boolean_t,
+        boolean_t, int, const dmu_replay_record_t *, int, uint64_t *,
+        uint64_t *, uint64_t *, nvlist_t **);
     int lzc_receive_with_header(const char *, nvlist_t *, const char *,
         boolean_t, boolean_t, boolean_t, int, const dmu_replay_record_t *);
     int lzc_release(nvlist_t *, nvlist_t **);
index 9b1aea193f44a94da073fa957d1174145b403071..c94ae6de6bbf1856257178818115d2c380a61f26 100644 (file)
@@ -2911,6 +2911,27 @@ class ZFSTest(unittest.TestCase):
             self.assertEqual(fs.getProperty("compression"), b"on")
             self.assertEqual(fs.getProperty("ns:prop"), b"val")
 
+    def test_recv_with_heal(self):
+        snap = ZFSTest.pool.makeName(b"fs1@snap1")
+        fs = ZFSTest.pool.getFilesystem(b"fs1")
+        props = {}
+        cmdprops = {
+            b"compression": 0x01,
+            b"ns:prop": b"val"
+        }
+
+        lzc.lzc_snapshot([snap])
+        with tempfile.TemporaryFile(suffix='.zstream') as stream:
+            lzc.lzc_send(snap, None, stream.fileno())
+            stream.seek(0)
+            (header, c_header) = lzc.receive_header(stream.fileno())
+            lzc.lzc_receive_with_heal(
+                snap, stream.fileno(), c_header, props=props,
+                cmdprops=cmdprops)
+            self.assertExists(snap)
+            self.assertEqual(fs.getProperty("compression"), b"on")
+            self.assertEqual(fs.getProperty("ns:prop"), b"val")
+
     def test_recv_with_cmdprops_and_recvprops(self):
         fromsnap = ZFSTest.pool.makeName(b"fs1@snap1")
         fs = ZFSTest.pool.getFilesystem(b"recv")
index 52e59ac6500aadfeb324b4333e019da07014897c..4948cd0d34c8bb62afafc22eb5ed5000bc916fd9 100644 (file)
@@ -829,6 +829,9 @@ typedef struct recvflags {
 
        /* force unmount while recv snapshot (private) */
        boolean_t forceunmount;
+
+       /* use this recv to check (and heal if needed) an existing snapshot */
+       boolean_t heal;
 } recvflags_t;
 
 _LIBZFS_H int zfs_receive(libzfs_handle_t *, const char *, nvlist_t *,
index 926d11eb563808bef3dc00d8c5d04a1ddc048568..14a4857c35daf2c7f87459d9859b6a8cda716759 100644 (file)
@@ -21,9 +21,9 @@
 
 /*
  * Copyright (c) 2012, 2020 by Delphix. All rights reserved.
- * Copyright (c) 2017 Datto Inc.
  * Copyright 2017 RackTop Systems.
  * Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
+ * Copyright (c) 2019 Datto Inc.
  */
 
 #ifndef        _LIBZFS_CORE_H
@@ -114,6 +114,10 @@ _LIBZFS_CORE_H int lzc_receive_with_cmdprops(const char *, nvlist_t *,
     nvlist_t *, uint8_t *, uint_t, const char *, boolean_t, boolean_t,
     boolean_t, int, const struct dmu_replay_record *, int, uint64_t *,
     uint64_t *, uint64_t *, nvlist_t **);
+_LIBZFS_CORE_H int lzc_receive_with_heal(const char *, nvlist_t *, nvlist_t *,
+    uint8_t *, uint_t, const char *, boolean_t, boolean_t, boolean_t, boolean_t,
+    int, const struct dmu_replay_record *, int, uint64_t *, uint64_t *,
+    uint64_t *, nvlist_t **);
 _LIBZFS_CORE_H int lzc_send_space(const char *, const char *,
     enum lzc_send_flags, uint64_t *);
 _LIBZFS_CORE_H int lzc_send_space_resume_redacted(const char *, const char *,
index 41a65e827a8037352ed11d500723db70315f89f0..538c73610a59ed13fe9cb058edb079cfa259865f 100644 (file)
@@ -24,6 +24,7 @@
  * Copyright (c) 2012, 2020 by Delphix. All rights reserved.
  * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
  * Copyright (c) 2013, Joyent, Inc. All rights reserved.
+ * Copyright (c) 2019 Datto Inc.
  */
 
 #ifndef _DMU_RECV_H
@@ -47,6 +48,7 @@ typedef struct dmu_recv_cookie {
        boolean_t drc_byteswap;
        uint64_t drc_featureflags;
        boolean_t drc_force;
+       boolean_t drc_heal;
        boolean_t drc_resumable;
        boolean_t drc_should_save;
        boolean_t drc_raw;
@@ -78,7 +80,7 @@ typedef struct dmu_recv_cookie {
 } dmu_recv_cookie_t;
 
 int dmu_recv_begin(char *, char *, dmu_replay_record_t *,
-    boolean_t, boolean_t, nvlist_t *, nvlist_t *, char *,
+    boolean_t, boolean_t, boolean_t, nvlist_t *, nvlist_t *, char *,
     dmu_recv_cookie_t *, zfs_file_t *, offset_t *);
 int dmu_recv_stream(dmu_recv_cookie_t *, offset_t *);
 int dmu_recv_end(dmu_recv_cookie_t *, void *);
index b53439a82c0025a3413c144696e5b5dff55fc844..e185ce6b1d8ea5fd0b8e2791112b858b50ac3c62 100644 (file)
  * Copyright 2013 Saso Kiselkov. All rights reserved.
  * Copyright (c) 2014 Integros [integros.com]
  * Copyright 2017 Joyent, Inc.
- * Copyright (c) 2017, 2019, Datto Inc. All rights reserved.
  * Copyright (c) 2017, Intel Corporation.
  * Copyright (c) 2019, Allan Jude
  * Copyright (c) 2019, Klara Inc.
+ * Copyright (c) 2019, Datto Inc.
  */
 
 #ifndef _SYS_SPA_H
@@ -1134,6 +1134,7 @@ extern const char *spa_state_to_name(spa_t *spa);
 /* error handling */
 struct zbookmark_phys;
 extern void spa_log_error(spa_t *spa, const zbookmark_phys_t *zb);
+extern void spa_remove_error(spa_t *spa, zbookmark_phys_t *zb);
 extern int zfs_ereport_post(const char *clazz, spa_t *spa, vdev_t *vd,
     const zbookmark_phys_t *zb, zio_t *zio, uint64_t state);
 extern boolean_t zfs_ereport_is_valid(const char *clazz, spa_t *spa, vdev_t *vd,
index 3fa9c80d16b8a2aeefaf6bb72fb4ec9f5045a4b4..469b1266e453519e830bfa3fe56a8b3253cf2fcb 100644 (file)
@@ -25,8 +25,8 @@
  * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
  * Copyright 2013 Saso Kiselkov. All rights reserved.
  * Copyright (c) 2016 Actifio, Inc. All rights reserved.
- * Copyright (c) 2017 Datto Inc.
  * Copyright (c) 2017, Intel Corporation.
+ * Copyright (c) 2019 Datto Inc.
  */
 
 #ifndef _SYS_SPA_IMPL_H
@@ -349,6 +349,7 @@ struct spa {
        kmutex_t        spa_errlist_lock;       /* error list/ereport lock */
        avl_tree_t      spa_errlist_last;       /* last error list */
        avl_tree_t      spa_errlist_scrub;      /* scrub error list */
+       avl_tree_t      spa_errlist_healed;     /* list of healed blocks */
        uint64_t        spa_deflate;            /* should we deflate? */
        uint64_t        spa_history;            /* history object */
        kmutex_t        spa_history_lock;       /* history lock */
index b6f8da76046502aab82a1733f710e6b738031a57..23fdda457bc3989e92a2d75a8eed399a3062efc4 100644 (file)
@@ -534,6 +534,8 @@ extern zio_t *zio_null(zio_t *pio, spa_t *spa, vdev_t *vd,
 extern zio_t *zio_root(spa_t *spa,
     zio_done_func_t *done, void *priv, enum zio_flag flags);
 
+extern void zio_destroy(zio_t *zio);
+
 extern zio_t *zio_read(zio_t *pio, spa_t *spa, const blkptr_t *bp,
     struct abd *data, uint64_t lsize, zio_done_func_t *done, void *priv,
     zio_priority_t priority, enum zio_flag flags, const zbookmark_phys_t *zb);
index d0c90899a210792969ad572bc0fdbeb96599915e..640051e3b029110662b5a703aa9c365b072e4fde 100644 (file)
@@ -436,6 +436,29 @@ send_iterate_prop(zfs_handle_t *zhp, boolean_t received_only, nvlist_t *nv)
        }
 }
 
+/*
+ * returns snapshot guid
+ * and returns 0 if the snapshot does not exist
+ */
+static uint64_t
+get_snap_guid(libzfs_handle_t *hdl, const char *fs, const char *snap)
+{
+       char name[MAXPATHLEN + 1];
+       uint64_t guid = 0;
+
+       if (fs == NULL || fs[0] == '\0' || snap == NULL || snap[0] == '\0')
+               return (guid);
+
+       (void) snprintf(name, sizeof (name), "%s@%s", fs, snap);
+       zfs_handle_t *zhp = zfs_open(hdl, name, ZFS_TYPE_SNAPSHOT);
+       if (zhp != NULL) {
+               guid = zfs_prop_get_int(zhp, ZFS_PROP_GUID);
+               zfs_close(zhp);
+       }
+
+       return (guid);
+}
+
 /*
  * returns snapshot creation txg
  * and returns 0 if the snapshot does not exist
@@ -4541,9 +4564,34 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap,
        redacted = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo) &
            DMU_BACKUP_FEATURE_REDACTED;
 
-       if (zfs_dataset_exists(hdl, name, ZFS_TYPE_DATASET)) {
+       if (flags->heal) {
+               if (flags->isprefix || flags->istail || flags->force ||
+                   flags->canmountoff || flags->resumable || flags->nomount ||
+                   flags->skipholds) {
+                       zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+                           "corrective recv can not be used when combined with"
+                           " this flag"));
+                       err = zfs_error(hdl, EZFS_INVALIDNAME, errbuf);
+                       goto out;
+               }
+               uint64_t guid =
+                   get_snap_guid(hdl, name, strchr(destsnap, '@') + 1);
+               if (guid == 0) {
+                       zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+                           "corrective recv must specify an existing snapshot"
+                           " to heal"));
+                       err = zfs_error(hdl, EZFS_INVALIDNAME, errbuf);
+                       goto out;
+               } else if (guid != drrb->drr_toguid) {
+                       zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+                           "local snapshot doesn't match the snapshot"
+                           " in the provided stream"));
+                       err = zfs_error(hdl, EZFS_WRONG_PARENT, errbuf);
+                       goto out;
+               }
+       } else if (zfs_dataset_exists(hdl, name, ZFS_TYPE_DATASET)) {
                zfs_cmd_t zc = {"\0"};
-               zfs_handle_t *zhp;
+               zfs_handle_t *zhp = NULL;
                boolean_t encrypted;
 
                (void) strcpy(zc.zc_name, name);
@@ -4737,8 +4785,9 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap,
        }
 
        if (flags->verbose) {
-               (void) printf("%s %s stream of %s into %s\n",
+               (void) printf("%s %s%s stream of %s into %s\n",
                    flags->dryrun ? "would receive" : "receiving",
+                   flags->heal ? " corrective" : "",
                    drrb->drr_fromguid ? "incremental" : "full",
                    drrb->drr_toname, destsnap);
                (void) fflush(stdout);
@@ -4808,10 +4857,17 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap,
                goto out;
        }
 
-       err = ioctl_err = lzc_receive_with_cmdprops(destsnap, rcvprops,
-           oxprops, wkeydata, wkeylen, origin, flags->force, flags->resumable,
-           raw, infd, drr_noswap, -1, &read_bytes, &errflags,
-           NULL, &prop_errors);
+       if (flags->heal) {
+               err = ioctl_err = lzc_receive_with_heal(destsnap, rcvprops,
+                   oxprops, wkeydata, wkeylen, origin, flags->force,
+                   flags->heal, flags->resumable, raw, infd, drr_noswap, -1,
+                   &read_bytes, &errflags, NULL, &prop_errors);
+       } else {
+               err = ioctl_err = lzc_receive_with_cmdprops(destsnap, rcvprops,
+                   oxprops, wkeydata, wkeylen, origin, flags->force,
+                   flags->resumable, raw, infd, drr_noswap, -1, &read_bytes,
+                   &errflags, NULL, &prop_errors);
+       }
        ioctl_errno = ioctl_err;
        prop_errflags = errflags;
 
@@ -4933,7 +4989,12 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap,
                        (void) zfs_error(hdl, EZFS_BADRESTORE, errbuf);
                        break;
                case EACCES:
-                       if (raw && stream_wantsnewfs) {
+                       if (flags->heal) {
+                               zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+                                   "key must be loaded to do a non-raw "
+                                   "corrective recv on an encrypted "
+                                   "dataset."));
+                       } else if (raw && stream_wantsnewfs) {
                                zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
                                    "failed to create encryption key"));
                        } else if (raw && !stream_wantsnewfs) {
@@ -4973,8 +5034,14 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap,
                        break;
                case ECKSUM:
                case ZFS_ERR_STREAM_TRUNCATED:
-                       recv_ecksum_set_aux(hdl, destsnap, flags->resumable,
-                           ioctl_err == ECKSUM);
+                       if (flags->heal)
+                               zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+                                   "corrective receive was not able to "
+                                   "reconstruct the data needed for "
+                                   "healing."));
+                       else
+                               recv_ecksum_set_aux(hdl, destsnap,
+                                   flags->resumable, ioctl_err == ECKSUM);
                        (void) zfs_error(hdl, EZFS_BADSTREAM, errbuf);
                        break;
                case ZFS_ERR_STREAM_LARGE_BLOCK_MISMATCH:
@@ -4984,8 +5051,14 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap,
                        (void) zfs_error(hdl, EZFS_BADSTREAM, errbuf);
                        break;
                case ENOTSUP:
-                       zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
-                           "pool must be upgraded to receive this stream."));
+                       if (flags->heal)
+                               zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+                                   "stream is not compatible with the "
+                                   "data in the pool."));
+                       else
+                               zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+                                   "pool must be upgraded to receive this "
+                                   "stream."));
                        (void) zfs_error(hdl, EZFS_BADVERSION, errbuf);
                        break;
                case EDQUOT:
index fae98469a04f1411d436d389c9a39fe2ee9dda88..7e340e1d4cc5654292f7fa2865213812d65b496b 100644 (file)
     <elf-symbol name='lzc_receive_resumable' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
     <elf-symbol name='lzc_receive_with_cmdprops' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
     <elf-symbol name='lzc_receive_with_header' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='lzc_receive_with_heal' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
     <elf-symbol name='lzc_redact' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
     <elf-symbol name='lzc_release' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
     <elf-symbol name='lzc_rename' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
       <parameter type-id='8341348b' name='begin_record'/>
       <return type-id='95e97e5e'/>
     </function-decl>
+    <function-decl name='lzc_receive_with_heal' mangled-name='lzc_receive_with_heal' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='lzc_receive_with_heal'>
+      <parameter type-id='80f4b756' name='snapname'/>
+      <parameter type-id='5ce45b60' name='props'/>
+      <parameter type-id='5ce45b60' name='cmdprops'/>
+      <parameter type-id='ae3e8ca6' name='wkeydata'/>
+      <parameter type-id='3502e3ff' name='wkeylen'/>
+      <parameter type-id='80f4b756' name='origin'/>
+      <parameter type-id='c19b74c3' name='force'/>
+      <parameter type-id='c19b74c3' name='heal'/>
+      <parameter type-id='c19b74c3' name='resumable'/>
+      <parameter type-id='c19b74c3' name='raw'/>
+      <parameter type-id='95e97e5e' name='input_fd'/>
+      <parameter type-id='8341348b' name='begin_record'/>
+      <parameter type-id='95e97e5e' name='cleanup_fd'/>
+      <parameter type-id='5d6479ae' name='read_bytes'/>
+      <parameter type-id='5d6479ae' name='errflags'/>
+      <parameter type-id='5d6479ae' name='action_handle'/>
+      <parameter type-id='857bb57e' name='errors'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
     <function-decl name='lzc_receive_one' mangled-name='lzc_receive_one' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='lzc_receive_one'>
       <parameter type-id='80f4b756' name='snapname'/>
       <parameter type-id='5ce45b60' name='props'/>
index d29133ab3fed17d7e0008c83d58770543d719662..16bd9af1bbc8de5ec6809a0cc9c881c8959df789 100644 (file)
 /*
  * Copyright (c) 2012, 2020 by Delphix. All rights reserved.
  * Copyright (c) 2013 Steven Hartland. All rights reserved.
- * Copyright (c) 2017 Datto Inc.
  * Copyright 2017 RackTop Systems.
  * Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
  * Copyright (c) 2019, 2020 by Christian Schwarz. All rights reserved.
+ * Copyright (c) 2019 Datto Inc.
  */
 
 /*
@@ -986,7 +986,7 @@ recv_read(int fd, void *buf, int ilen)
 static int
 recv_impl(const char *snapname, nvlist_t *recvdprops, nvlist_t *localprops,
     uint8_t *wkeydata, uint_t wkeylen, const char *origin, boolean_t force,
-    boolean_t resumable, boolean_t raw, int input_fd,
+    boolean_t heal, boolean_t resumable, boolean_t raw, int input_fd,
     const dmu_replay_record_t *begin_record, uint64_t *read_bytes,
     uint64_t *errflags, nvlist_t **errors)
 {
@@ -1041,7 +1041,7 @@ recv_impl(const char *snapname, nvlist_t *recvdprops, nvlist_t *localprops,
        /*
         * All receives with a payload should use the new interface.
         */
-       if (resumable || raw || wkeydata != NULL || payload) {
+       if (resumable || heal || raw || wkeydata != NULL || payload) {
                nvlist_t *outnvl = NULL;
                nvlist_t *innvl = fnvlist_alloc();
 
@@ -1081,6 +1081,8 @@ recv_impl(const char *snapname, nvlist_t *recvdprops, nvlist_t *localprops,
                if (resumable)
                        fnvlist_add_boolean(innvl, "resumable");
 
+               if (heal)
+                       fnvlist_add_boolean(innvl, "heal");
 
                error = lzc_ioctl(ZFS_IOC_RECV_NEW, fsname, innvl, &outnvl);
 
@@ -1180,7 +1182,7 @@ lzc_receive(const char *snapname, nvlist_t *props, const char *origin,
     boolean_t force, boolean_t raw, int fd)
 {
        return (recv_impl(snapname, props, NULL, NULL, 0, origin, force,
-           B_FALSE, raw, fd, NULL, NULL, NULL, NULL));
+           B_FALSE, B_FALSE, raw, fd, NULL, NULL, NULL, NULL));
 }
 
 /*
@@ -1194,7 +1196,7 @@ lzc_receive_resumable(const char *snapname, nvlist_t *props, const char *origin,
     boolean_t force, boolean_t raw, int fd)
 {
        return (recv_impl(snapname, props, NULL, NULL, 0, origin, force,
-           B_TRUE, raw, fd, NULL, NULL, NULL, NULL));
+           B_FALSE, B_TRUE, raw, fd, NULL, NULL, NULL, NULL));
 }
 
 /*
@@ -1217,7 +1219,7 @@ lzc_receive_with_header(const char *snapname, nvlist_t *props,
                return (EINVAL);
 
        return (recv_impl(snapname, props, NULL, NULL, 0, origin, force,
-           resumable, raw, fd, begin_record, NULL, NULL, NULL));
+           B_FALSE, resumable, raw, fd, begin_record, NULL, NULL, NULL));
 }
 
 /*
@@ -1247,7 +1249,7 @@ lzc_receive_one(const char *snapname, nvlist_t *props,
 {
        (void) action_handle, (void) cleanup_fd;
        return (recv_impl(snapname, props, NULL, NULL, 0, origin, force,
-           resumable, raw, input_fd, begin_record,
+           B_FALSE, resumable, raw, input_fd, begin_record,
            read_bytes, errflags, errors));
 }
 
@@ -1269,7 +1271,27 @@ lzc_receive_with_cmdprops(const char *snapname, nvlist_t *props,
 {
        (void) action_handle, (void) cleanup_fd;
        return (recv_impl(snapname, props, cmdprops, wkeydata, wkeylen, origin,
-           force, resumable, raw, input_fd, begin_record,
+           force, B_FALSE, resumable, raw, input_fd, begin_record,
+           read_bytes, errflags, errors));
+}
+
+/*
+ * Like lzc_receive_with_cmdprops, but allows the caller to pass an additional
+ * 'heal' argument.
+ *
+ * The heal arguments tells us to heal the provided snapshot using the provided
+ * send stream
+ */
+int lzc_receive_with_heal(const char *snapname, nvlist_t *props,
+    nvlist_t *cmdprops, uint8_t *wkeydata, uint_t wkeylen, const char *origin,
+    boolean_t force, boolean_t heal, boolean_t resumable, boolean_t raw,
+    int input_fd, const dmu_replay_record_t *begin_record, int cleanup_fd,
+    uint64_t *read_bytes, uint64_t *errflags, uint64_t *action_handle,
+    nvlist_t **errors)
+{
+       (void) action_handle, (void) cleanup_fd;
+       return (recv_impl(snapname, props, cmdprops, wkeydata, wkeylen, origin,
+           force, heal, resumable, raw, input_fd, begin_record,
            read_bytes, errflags, errors));
 }
 
index 0e208d279504457256295b46e86b03dade3ce74d..cc55ee32ba24d63a1396d5aa4498484759106fa4 100644 (file)
@@ -1870,6 +1870,17 @@ This setting will not reduce the write size below a single block.
 Capped at a maximum of
 .Sy 32 MiB .
 .
+.It Sy zfs_recv_best_effort_corrective Ns = Ns Sy 0 Pq int
+When this variable is set to non-zero a corrective receive:
+.Bl -enum -compact -offset 4n -width "1."
+.It
+Does not enforce the restriction of source & destination snapshot GUIDs
+matching.
+.It
+If there is an error during healing, the healing receive is not
+terminated instead it moves on to the next record.
+.El
+.
 .It Sy zfs_override_estimate_recordsize Ns = Ns Sy 0 Ns | Ns 1 Pq ulong
 Setting this variable overrides the default logic for estimating block
 sizes when doing a
index b063b1e73dbb2c4f14e3a03f98c35fbd6381f9ec..22cb567c1c96b06ccb2e8fdc1a45d21ce0a7c490 100644 (file)
@@ -29,7 +29,7 @@
 .\" Copyright 2018 Nexenta Systems, Inc.
 .\" Copyright 2019 Joyent, Inc.
 .\"
-.Dd March 16, 2022
+.Dd April 26, 2022
 .Dt ZFS-RECEIVE 8
 .Os
 .
 .Fl A
 .Ar filesystem Ns | Ns Ar volume
 .
+.Nm
+.Cm receive
+.Fl c
+.Op Fl vn
+.Ar filesystem Ns | Ns Ar snapshot
+.
 .Sh DESCRIPTION
 .Bl -tag -width ""
 .It Xo
@@ -393,6 +399,24 @@ restrictions (e.g. set-once) apply equally to
 Abort an interrupted
 .Nm zfs Cm receive Fl s ,
 deleting its saved partially received state.
+.It Xo
+.Nm zfs
+.Cm receive
+.Fl c
+.Op Fl vn
+.Ar filesystem Ns | Ns Ar snapshot
+.Xc
+Attempt to correct data corruption in the specified dataset,
+by using the provided stream as the source of healthy data.
+This method of healing can only heal data blocks present in the stream.
+Metadata can not be healed by corrective receive.
+Running a scrub is recommended post-healing to ensure all corruption was
+healed.
+.Pp
+It's important to consider why corruption has happened in the first place
+since if you have slowly failing hardware periodically healing the data
+is not going to save you from data loss later on when the hardware fails
+completely.
 .El
 .
 .Sh EXAMPLES
index a2c9bb556abdc1f5c7d9eb4f15390fb03fa93359..58c88c7d78540b99ecef5b82b932436a3b0d5b07 100644 (file)
@@ -520,6 +520,9 @@ dmu_buf_hold_array_by_dnode(dnode_t *dn, uint64_t offset, uint64_t length,
        dbuf_flags = DB_RF_CANFAIL | DB_RF_NEVERWAIT | DB_RF_HAVESTRUCT |
            DB_RF_NOPREFETCH;
 
+       if ((flags & DMU_READ_NO_DECRYPT) != 0)
+               dbuf_flags |= DB_RF_NO_DECRYPT;
+
        rw_enter(&dn->dn_struct_rwlock, RW_READER);
        if (dn->dn_datablkshift) {
                int blkshift = dn->dn_datablkshift;
index a8f511061fb232e80602d11336cb27446ac06582..55d03677feaa9d3e2d8d57b4c541387fa2f4caec 100644 (file)
  * Copyright (c) 2018, loli10K <ezomori.nozomu@gmail.com>. All rights reserved.
  * Copyright (c) 2019, Klara Inc.
  * Copyright (c) 2019, Allan Jude
+ * Copyright (c) 2019 Datto Inc.
+ * Copyright (c) 2022 Axcient.
  */
 
+#include <sys/spa_impl.h>
 #include <sys/dmu.h>
 #include <sys/dmu_impl.h>
 #include <sys/dmu_send.h>
@@ -67,6 +70,7 @@
 static int zfs_recv_queue_length = SPA_MAXBLOCKSIZE;
 static int zfs_recv_queue_ff = 20;
 static int zfs_recv_write_batch_size = 1024 * 1024;
+static int zfs_recv_best_effort_corrective = 0;
 
 static const void *const dmu_recv_tag = "dmu_recv_tag";
 const char *const recv_clone_name = "%recv";
@@ -102,6 +106,8 @@ struct receive_writer_arg {
        boolean_t done;
 
        int err;
+       const char *tofs;
+       boolean_t heal;
        boolean_t resumable;
        boolean_t raw;   /* DMU_BACKUP_FEATURE_RAW set */
        boolean_t spill; /* DRR_FLAG_SPILL_BLOCK set */
@@ -121,6 +127,7 @@ struct receive_writer_arg {
        uint8_t or_iv[ZIO_DATA_IV_LEN];
        uint8_t or_mac[ZIO_DATA_MAC_LEN];
        boolean_t or_byteorder;
+       zio_t *heal_pio;
 };
 
 typedef struct dmu_recv_begin_arg {
@@ -343,9 +350,10 @@ static int
 recv_begin_check_existing_impl(dmu_recv_begin_arg_t *drba, dsl_dataset_t *ds,
     uint64_t fromguid, uint64_t featureflags)
 {
-       uint64_t val;
+       uint64_t obj;
        uint64_t children;
        int error;
+       dsl_dataset_t *snap;
        dsl_pool_t *dp = ds->ds_dir->dd_pool;
        boolean_t encrypted = ds->ds_dir->dd_crypto_obj != 0;
        boolean_t raw = (featureflags & DMU_BACKUP_FEATURE_RAW) != 0;
@@ -354,7 +362,7 @@ recv_begin_check_existing_impl(dmu_recv_begin_arg_t *drba, dsl_dataset_t *ds,
        /* Temporary clone name must not exist. */
        error = zap_lookup(dp->dp_meta_objset,
            dsl_dir_phys(ds->ds_dir)->dd_child_dir_zapobj, recv_clone_name,
-           8, 1, &val);
+           8, 1, &obj);
        if (error != ENOENT)
                return (error == 0 ? SET_ERROR(EBUSY) : error);
 
@@ -362,12 +370,16 @@ recv_begin_check_existing_impl(dmu_recv_begin_arg_t *drba, dsl_dataset_t *ds,
        if (dsl_dataset_has_resume_receive_state(ds))
                return (SET_ERROR(EBUSY));
 
-       /* New snapshot name must not exist. */
+       /* New snapshot name must not exist if we're not healing it. */
        error = zap_lookup(dp->dp_meta_objset,
            dsl_dataset_phys(ds)->ds_snapnames_zapobj,
-           drba->drba_cookie->drc_tosnap, 8, 1, &val);
-       if (error != ENOENT)
+           drba->drba_cookie->drc_tosnap, 8, 1, &obj);
+       if (drba->drba_cookie->drc_heal) {
+               if (error != 0)
+                       return (error);
+       } else if (error != ENOENT) {
                return (error == 0 ? SET_ERROR(EEXIST) : error);
+       }
 
        /* Must not have children if receiving a ZVOL. */
        error = zap_count(dp->dp_meta_objset,
@@ -392,8 +404,40 @@ recv_begin_check_existing_impl(dmu_recv_begin_arg_t *drba, dsl_dataset_t *ds,
        if (error != 0)
                return (error);
 
-       if (fromguid != 0) {
-               dsl_dataset_t *snap;
+       if (drba->drba_cookie->drc_heal) {
+               /* Encryption is incompatible with embedded data. */
+               if (encrypted && embed)
+                       return (SET_ERROR(EINVAL));
+
+               /* Healing is not supported when in 'force' mode. */
+               if (drba->drba_cookie->drc_force)
+                       return (SET_ERROR(EINVAL));
+
+               /* Must have keys loaded if doing encrypted non-raw recv. */
+               if (encrypted && !raw) {
+                       if (spa_keystore_lookup_key(dp->dp_spa, ds->ds_object,
+                           NULL, NULL) != 0)
+                               return (SET_ERROR(EACCES));
+               }
+
+               error = dsl_dataset_hold_obj(dp, obj, FTAG, &snap);
+               if (error != 0)
+                       return (error);
+
+               /*
+                * When not doing best effort corrective recv healing can only
+                * be done if the send stream is for the same snapshot as the
+                * one we are trying to heal.
+                */
+               if (zfs_recv_best_effort_corrective == 0 &&
+                   drba->drba_cookie->drc_drrb->drr_toguid !=
+                   dsl_dataset_phys(snap)->ds_guid) {
+                       dsl_dataset_rele(snap, FTAG);
+                       return (SET_ERROR(ENOTSUP));
+               }
+               dsl_dataset_rele(snap, FTAG);
+       } else if (fromguid != 0) {
+               /* Sanity check the incremental recv */
                uint64_t obj = dsl_dataset_phys(ds)->ds_prev_snap_obj;
 
                /* Can't perform a raw receive on top of a non-raw receive */
@@ -459,7 +503,7 @@ recv_begin_check_existing_impl(dmu_recv_begin_arg_t *drba, dsl_dataset_t *ds,
 
                dsl_dataset_rele(snap, FTAG);
        } else {
-               /* if full, then must be forced */
+               /* If full and not healing then must be forced. */
                if (!drba->drba_cookie->drc_force)
                        return (SET_ERROR(EEXIST));
 
@@ -626,6 +670,10 @@ dmu_recv_begin_check(void *arg, dmu_tx_t *tx)
                char buf[ZFS_MAX_DATASET_NAME_LEN];
                objset_t *os;
 
+               /* healing recv must be done "into" an existing snapshot */
+               if (drba->drba_cookie->drc_heal == B_TRUE)
+                       return (SET_ERROR(ENOTSUP));
+
                /*
                 * If it's a non-clone incremental, we are missing the
                 * target fs, so fail the recv.
@@ -807,7 +855,7 @@ dmu_recv_begin_sync(void *arg, dmu_tx_t *tx)
 
        error = dsl_dataset_hold_flags(dp, tofs, dsflags, FTAG, &ds);
        if (error == 0) {
-               /* create temporary clone */
+               /* Create temporary clone unless we're doing corrective recv */
                dsl_dataset_t *snap = NULL;
 
                if (drba->drba_cookie->drc_fromsnapobj != 0) {
@@ -815,8 +863,15 @@ dmu_recv_begin_sync(void *arg, dmu_tx_t *tx)
                            drba->drba_cookie->drc_fromsnapobj, FTAG, &snap));
                        ASSERT3P(dcp, ==, NULL);
                }
-               dsobj = dsl_dataset_create_sync(ds->ds_dir, recv_clone_name,
-                   snap, crflags, drba->drba_cred, dcp, tx);
+               if (drc->drc_heal) {
+                       /* When healing we want to use the provided snapshot */
+                       VERIFY0(dsl_dataset_snap_lookup(ds, drc->drc_tosnap,
+                           &dsobj));
+               } else {
+                       dsobj = dsl_dataset_create_sync(ds->ds_dir,
+                           recv_clone_name, snap, crflags, drba->drba_cred,
+                           dcp, tx);
+               }
                if (drba->drba_cookie->drc_fromsnapobj != 0)
                        dsl_dataset_rele(snap, FTAG);
                dsl_dataset_rele_flags(ds, dsflags, FTAG);
@@ -933,7 +988,8 @@ dmu_recv_begin_sync(void *arg, dmu_tx_t *tx)
         */
        rrw_enter(&newds->ds_bp_rwlock, RW_READER, FTAG);
        if (BP_IS_HOLE(dsl_dataset_get_blkptr(newds)) &&
-           (featureflags & DMU_BACKUP_FEATURE_RAW) == 0) {
+           (featureflags & DMU_BACKUP_FEATURE_RAW) == 0 &&
+           !drc->drc_heal) {
                (void) dmu_objset_create_impl(dp->dp_spa,
                    newds, dsl_dataset_get_blkptr(newds), drrb->drr_type, tx);
        }
@@ -1141,7 +1197,7 @@ dmu_recv_resume_begin_sync(void *arg, dmu_tx_t *tx)
  */
 int
 dmu_recv_begin(char *tofs, char *tosnap, dmu_replay_record_t *drr_begin,
-    boolean_t force, boolean_t resumable, nvlist_t *localprops,
+    boolean_t force, boolean_t heal, boolean_t resumable, nvlist_t *localprops,
     nvlist_t *hidden_args, char *origin, dmu_recv_cookie_t *drc,
     zfs_file_t *fp, offset_t *voffp)
 {
@@ -1154,6 +1210,7 @@ dmu_recv_begin(char *tofs, char *tosnap, dmu_replay_record_t *drr_begin,
        drc->drc_tosnap = tosnap;
        drc->drc_tofs = tofs;
        drc->drc_force = force;
+       drc->drc_heal = heal;
        drc->drc_resumable = resumable;
        drc->drc_cred = CRED();
        drc->drc_proc = curproc;
@@ -1243,6 +1300,182 @@ dmu_recv_begin(char *tofs, char *tosnap, dmu_replay_record_t *drr_begin,
        return (err);
 }
 
+/*
+ * Holds data need for corrective recv callback
+ */
+typedef struct cr_cb_data {
+       uint64_t size;
+       zbookmark_phys_t zb;
+       spa_t *spa;
+} cr_cb_data_t;
+
+static void
+corrective_read_done(zio_t *zio)
+{
+       cr_cb_data_t *data = zio->io_private;
+       /* Corruption corrected; update error log if needed */
+       if (zio->io_error == 0)
+               spa_remove_error(data->spa, &data->zb);
+       kmem_free(data, sizeof (cr_cb_data_t));
+       abd_free(zio->io_abd);
+}
+
+/*
+ * zio_rewrite the data pointed to by bp with the data from the rrd's abd.
+ */
+static int
+do_corrective_recv(struct receive_writer_arg *rwa, struct drr_write *drrw,
+    struct receive_record_arg *rrd, blkptr_t *bp)
+{
+       int err;
+       zio_t *io;
+       zbookmark_phys_t zb;
+       dnode_t *dn;
+       abd_t *abd = rrd->abd;
+       zio_cksum_t bp_cksum = bp->blk_cksum;
+       enum zio_flag flags = ZIO_FLAG_SPECULATIVE |
+           ZIO_FLAG_DONT_CACHE | ZIO_FLAG_DONT_RETRY | ZIO_FLAG_CANFAIL;
+
+       if (rwa->raw)
+               flags |= ZIO_FLAG_RAW;
+
+       err = dnode_hold(rwa->os, drrw->drr_object, FTAG, &dn);
+       if (err != 0)
+               return (err);
+       SET_BOOKMARK(&zb, dmu_objset_id(rwa->os), drrw->drr_object, 0,
+           dbuf_whichblock(dn, 0, drrw->drr_offset));
+       dnode_rele(dn, FTAG);
+
+       if (!rwa->raw && DRR_WRITE_COMPRESSED(drrw)) {
+               /* Decompress the stream data */
+               abd_t *dabd = abd_alloc_linear(
+                   drrw->drr_logical_size, B_FALSE);
+               err = zio_decompress_data(drrw->drr_compressiontype,
+                   abd, abd_to_buf(dabd), abd_get_size(abd),
+                   abd_get_size(dabd), NULL);
+
+               if (err != 0) {
+                       abd_free(dabd);
+                       return (err);
+               }
+               /* Swap in the newly decompressed data into the abd */
+               abd_free(abd);
+               abd = dabd;
+       }
+
+       if (!rwa->raw && BP_GET_COMPRESS(bp) != ZIO_COMPRESS_OFF) {
+               /* Recompress the data */
+               abd_t *cabd = abd_alloc_linear(BP_GET_PSIZE(bp),
+                   B_FALSE);
+               uint64_t csize = zio_compress_data(BP_GET_COMPRESS(bp),
+                   abd, abd_to_buf(cabd), abd_get_size(abd),
+                   rwa->os->os_complevel);
+               abd_zero_off(cabd, csize, BP_GET_PSIZE(bp) - csize);
+               /* Swap in newly compressed data into the abd */
+               abd_free(abd);
+               abd = cabd;
+               flags |= ZIO_FLAG_RAW_COMPRESS;
+       }
+
+       /*
+        * The stream is not encrypted but the data on-disk is.
+        * We need to re-encrypt the buf using the same
+        * encryption type, salt, iv, and mac that was used to encrypt
+        * the block previosly.
+        */
+       if (!rwa->raw && BP_USES_CRYPT(bp)) {
+               dsl_dataset_t *ds;
+               dsl_crypto_key_t *dck = NULL;
+               uint8_t salt[ZIO_DATA_SALT_LEN];
+               uint8_t iv[ZIO_DATA_IV_LEN];
+               uint8_t mac[ZIO_DATA_MAC_LEN];
+               boolean_t no_crypt = B_FALSE;
+               dsl_pool_t *dp = dmu_objset_pool(rwa->os);
+               abd_t *eabd = abd_alloc_linear(BP_GET_PSIZE(bp), B_FALSE);
+
+               zio_crypt_decode_params_bp(bp, salt, iv);
+               zio_crypt_decode_mac_bp(bp, mac);
+
+               dsl_pool_config_enter(dp, FTAG);
+               err = dsl_dataset_hold_flags(dp, rwa->tofs,
+                   DS_HOLD_FLAG_DECRYPT, FTAG, &ds);
+               if (err != 0) {
+                       dsl_pool_config_exit(dp, FTAG);
+                       abd_free(eabd);
+                       return (SET_ERROR(EACCES));
+               }
+
+               /* Look up the key from the spa's keystore */
+               err = spa_keystore_lookup_key(rwa->os->os_spa,
+                   zb.zb_objset, FTAG, &dck);
+               if (err != 0) {
+                       dsl_dataset_rele_flags(ds, DS_HOLD_FLAG_DECRYPT,
+                           FTAG);
+                       dsl_pool_config_exit(dp, FTAG);
+                       abd_free(eabd);
+                       return (SET_ERROR(EACCES));
+               }
+
+               err = zio_do_crypt_abd(B_TRUE, &dck->dck_key,
+                   BP_GET_TYPE(bp), BP_SHOULD_BYTESWAP(bp), salt, iv,
+                   mac, abd_get_size(abd), abd, eabd, &no_crypt);
+
+               spa_keystore_dsl_key_rele(rwa->os->os_spa, dck, FTAG);
+               dsl_dataset_rele_flags(ds, DS_HOLD_FLAG_DECRYPT, FTAG);
+               dsl_pool_config_exit(dp, FTAG);
+
+               ASSERT0(no_crypt);
+               if (err != 0) {
+                       abd_free(eabd);
+                       return (err);
+               }
+               /* Swap in the newly encrypted data into the abd */
+               abd_free(abd);
+               abd = eabd;
+
+               /*
+                * We want to prevent zio_rewrite() from trying to
+                * encrypt the data again
+                */
+               flags |= ZIO_FLAG_RAW_ENCRYPT;
+       }
+       rrd->abd = abd;
+
+       io = zio_rewrite(NULL, rwa->os->os_spa, bp->blk_birth, bp, abd,
+           BP_GET_PSIZE(bp), NULL, NULL, ZIO_PRIORITY_SYNC_WRITE, flags, &zb);
+
+       ASSERT(abd_get_size(abd) == BP_GET_LSIZE(bp) ||
+           abd_get_size(abd) == BP_GET_PSIZE(bp));
+
+       /* compute new bp checksum value and make sure it matches the old one */
+       zio_checksum_compute(io, BP_GET_CHECKSUM(bp), abd, abd_get_size(abd));
+       if (!ZIO_CHECKSUM_EQUAL(bp_cksum, io->io_bp->blk_cksum)) {
+               zio_destroy(io);
+               if (zfs_recv_best_effort_corrective != 0)
+                       return (0);
+               return (SET_ERROR(ECKSUM));
+       }
+
+       /* Correct the corruption in place */
+       err = zio_wait(io);
+       if (err == 0) {
+               cr_cb_data_t *cb_data =
+                   kmem_alloc(sizeof (cr_cb_data_t), KM_SLEEP);
+               cb_data->spa = rwa->os->os_spa;
+               cb_data->size = drrw->drr_logical_size;
+               cb_data->zb = zb;
+               /* Test if healing worked by re-reading the bp */
+               err = zio_wait(zio_read(rwa->heal_pio, rwa->os->os_spa, bp,
+                   abd_alloc_for_io(drrw->drr_logical_size, B_FALSE),
+                   drrw->drr_logical_size, corrective_read_done,
+                   cb_data, ZIO_PRIORITY_ASYNC_READ, flags, NULL));
+       }
+       if (err != 0 && zfs_recv_best_effort_corrective != 0)
+               err = 0;
+
+       return (err);
+}
+
 static int
 receive_read(dmu_recv_cookie_t *drc, int len, void *buf)
 {
@@ -2049,6 +2282,58 @@ receive_process_write_record(struct receive_writer_arg *rwa,
            !DMU_OT_IS_VALID(drrw->drr_type))
                return (SET_ERROR(EINVAL));
 
+       if (rwa->heal) {
+               blkptr_t *bp;
+               dmu_buf_t *dbp;
+               dnode_t *dn;
+               int flags = DB_RF_CANFAIL;
+
+               if (rwa->raw)
+                       flags |= DB_RF_NO_DECRYPT;
+
+               if (rwa->byteswap) {
+                       dmu_object_byteswap_t byteswap =
+                           DMU_OT_BYTESWAP(drrw->drr_type);
+                       dmu_ot_byteswap[byteswap].ob_func(abd_to_buf(rrd->abd),
+                           DRR_WRITE_PAYLOAD_SIZE(drrw));
+               }
+
+               err = dmu_buf_hold_noread(rwa->os, drrw->drr_object,
+                   drrw->drr_offset, FTAG, &dbp);
+               if (err != 0)
+                       return (err);
+
+               /* Try to read the object to see if it needs healing */
+               err = dbuf_read((dmu_buf_impl_t *)dbp, NULL, flags);
+               /*
+                * We only try to heal when dbuf_read() returns a ECKSUMs.
+                * Other errors (even EIO) get returned to caller.
+                * EIO indicates that the device is not present/accessible,
+                * so writing to it will likely fail.
+                * If the block is healthy, we don't want to overwrite it
+                * unnecessarily.
+                */
+               if (err != ECKSUM) {
+                       dmu_buf_rele(dbp, FTAG);
+                       return (err);
+               }
+               dn = dmu_buf_dnode_enter(dbp);
+               /* Make sure the on-disk block and recv record sizes match */
+               if (drrw->drr_logical_size !=
+                   dn->dn_datablkszsec << SPA_MINBLOCKSHIFT) {
+                       err = ENOTSUP;
+                       dmu_buf_dnode_exit(dbp);
+                       dmu_buf_rele(dbp, FTAG);
+                       return (err);
+               }
+               /* Get the block pointer for the corrupted block */
+               bp = dmu_buf_get_blkptr(dbp);
+               err = do_corrective_recv(rwa, drrw, rrd, bp);
+               dmu_buf_dnode_exit(dbp);
+               dmu_buf_rele(dbp, FTAG);
+               return (err);
+       }
+
        /*
         * For resuming to work, records must be in increasing order
         * by (object, offset).
@@ -2341,7 +2626,8 @@ dmu_recv_cleanup_ds(dmu_recv_cookie_t *drc)
                rrw_exit(&ds->ds_bp_rwlock, FTAG);
                dsl_dataset_name(ds, name);
                dsl_dataset_disown(ds, dsflags, dmu_recv_tag);
-               (void) dsl_destroy_head(name);
+               if (!drc->drc_heal)
+                       (void) dsl_destroy_head(name);
        }
 }
 
@@ -2702,7 +2988,19 @@ receive_process_record(struct receive_writer_arg *rwa,
        ASSERT3U(rrd->bytes_read, >=, rwa->bytes_read);
        rwa->bytes_read = rrd->bytes_read;
 
-       if (rrd->header.drr_type != DRR_WRITE) {
+       /* We can only heal write records; other ones get ignored */
+       if (rwa->heal && rrd->header.drr_type != DRR_WRITE) {
+               if (rrd->abd != NULL) {
+                       abd_free(rrd->abd);
+                       rrd->abd = NULL;
+               } else if (rrd->payload != NULL) {
+                       kmem_free(rrd->payload, rrd->payload_size);
+                       rrd->payload = NULL;
+               }
+               return (0);
+       }
+
+       if (!rwa->heal && rrd->header.drr_type != DRR_WRITE) {
                err = flush_write_batch(rwa);
                if (err != 0) {
                        if (rrd->abd != NULL) {
@@ -2737,9 +3035,16 @@ receive_process_record(struct receive_writer_arg *rwa,
        case DRR_WRITE:
        {
                err = receive_process_write_record(rwa, rrd);
-               if (err != EAGAIN) {
+               if (rwa->heal) {
+                       /*
+                        * If healing - always free the abd after processing
+                        */
+                       abd_free(rrd->abd);
+                       rrd->abd = NULL;
+               } else if (err != EAGAIN) {
                        /*
-                        * On success, receive_process_write_record() returns
+                        * On success, a non-healing
+                        * receive_process_write_record() returns
                         * EAGAIN to indicate that we do not want to free
                         * the rrd or arc_buf.
                         */
@@ -2830,8 +3135,9 @@ receive_writer_thread(void *arg)
                 * EAGAIN indicates that this record has been saved (on
                 * raw->write_batch), and will be used again, so we don't
                 * free it.
+                * When healing data we always need to free the record.
                 */
-               if (err != EAGAIN) {
+               if (err != EAGAIN || rwa->heal) {
                        if (rwa->err == 0)
                                rwa->err = err;
                        kmem_free(rrd, sizeof (*rrd));
@@ -2839,10 +3145,13 @@ receive_writer_thread(void *arg)
        }
        kmem_free(rrd, sizeof (*rrd));
 
-       int err = flush_write_batch(rwa);
-       if (rwa->err == 0)
-               rwa->err = err;
-
+       if (rwa->heal) {
+               zio_wait(rwa->heal_pio);
+       } else {
+               int err = flush_write_batch(rwa);
+               if (rwa->err == 0)
+                       rwa->err = err;
+       }
        mutex_enter(&rwa->mutex);
        rwa->done = B_TRUE;
        cv_signal(&rwa->cv);
@@ -2926,17 +3235,19 @@ dmu_recv_stream(dmu_recv_cookie_t *drc, offset_t *voffp)
                if (err != 0)
                        goto out;
 
-               /*
-                * If this is a new dataset we set the key immediately.
-                * Otherwise we don't want to change the key until we
-                * are sure the rest of the receive succeeded so we stash
-                * the keynvl away until then.
-                */
-               err = dsl_crypto_recv_raw(spa_name(drc->drc_os->os_spa),
-                   drc->drc_ds->ds_object, drc->drc_fromsnapobj,
-                   drc->drc_drrb->drr_type, keynvl, drc->drc_newfs);
-               if (err != 0)
-                       goto out;
+               if (!drc->drc_heal) {
+                       /*
+                        * If this is a new dataset we set the key immediately.
+                        * Otherwise we don't want to change the key until we
+                        * are sure the rest of the receive succeeded so we
+                        * stash the keynvl away until then.
+                        */
+                       err = dsl_crypto_recv_raw(spa_name(drc->drc_os->os_spa),
+                           drc->drc_ds->ds_object, drc->drc_fromsnapobj,
+                           drc->drc_drrb->drr_type, keynvl, drc->drc_newfs);
+                       if (err != 0)
+                               goto out;
+               }
 
                /* see comment in dmu_recv_end_sync() */
                drc->drc_ivset_guid = 0;
@@ -2967,11 +3278,17 @@ dmu_recv_stream(dmu_recv_cookie_t *drc, offset_t *voffp)
        mutex_init(&rwa->mutex, NULL, MUTEX_DEFAULT, NULL);
        rwa->os = drc->drc_os;
        rwa->byteswap = drc->drc_byteswap;
+       rwa->heal = drc->drc_heal;
+       rwa->tofs = drc->drc_tofs;
        rwa->resumable = drc->drc_resumable;
        rwa->raw = drc->drc_raw;
        rwa->spill = drc->drc_spill;
        rwa->full = (drc->drc_drr_begin->drr_u.drr_begin.drr_fromguid == 0);
        rwa->os->os_raw_receive = drc->drc_raw;
+       if (drc->drc_heal) {
+               rwa->heal_pio = zio_root(drc->drc_os->os_spa, NULL, NULL,
+                   ZIO_FLAG_GODFATHER);
+       }
        list_create(&rwa->write_batch, sizeof (struct receive_record_arg),
            offsetof(struct receive_record_arg, node.bqn_node));
 
@@ -3107,7 +3424,9 @@ dmu_recv_end_check(void *arg, dmu_tx_t *tx)
 
        ASSERT3P(drc->drc_ds->ds_owner, ==, dmu_recv_tag);
 
-       if (!drc->drc_newfs) {
+       if (drc->drc_heal) {
+               error = 0;
+       } else if (!drc->drc_newfs) {
                dsl_dataset_t *origin_head;
 
                error = dsl_dataset_hold(dp, drc->drc_tofs, FTAG, &origin_head);
@@ -3183,13 +3502,18 @@ dmu_recv_end_sync(void *arg, dmu_tx_t *tx)
        dmu_recv_cookie_t *drc = arg;
        dsl_pool_t *dp = dmu_tx_pool(tx);
        boolean_t encrypted = drc->drc_ds->ds_dir->dd_crypto_obj != 0;
-       uint64_t newsnapobj;
+       uint64_t newsnapobj = 0;
 
        spa_history_log_internal_ds(drc->drc_ds, "finish receiving",
            tx, "snap=%s", drc->drc_tosnap);
        drc->drc_ds->ds_objset->os_raw_receive = B_FALSE;
 
-       if (!drc->drc_newfs) {
+       if (drc->drc_heal) {
+               if (drc->drc_keynvl != NULL) {
+                       nvlist_free(drc->drc_keynvl);
+                       drc->drc_keynvl = NULL;
+               }
+       } else if (!drc->drc_newfs) {
                dsl_dataset_t *origin_head;
 
                VERIFY0(dsl_dataset_hold(dp, drc->drc_tofs, FTAG,
@@ -3303,7 +3627,7 @@ dmu_recv_end_sync(void *arg, dmu_tx_t *tx)
         * tunable is set, in which case we will leave the newly-generated
         * value.
         */
-       if (drc->drc_raw && drc->drc_ivset_guid != 0) {
+       if (!drc->drc_heal && drc->drc_raw && drc->drc_ivset_guid != 0) {
                dmu_object_zapify(dp->dp_meta_objset, newsnapobj,
                    DMU_OT_DSL_DATASET, tx);
                VERIFY0(zap_update(dp->dp_meta_objset, newsnapobj,
@@ -3370,7 +3694,7 @@ dmu_recv_end(dmu_recv_cookie_t *drc, void *owner)
        if (error != 0) {
                dmu_recv_cleanup_ds(drc);
                nvlist_free(drc->drc_keynvl);
-       } else {
+       } else if (!drc->drc_heal) {
                if (drc->drc_newfs) {
                        zvol_create_minor(drc->drc_tofs);
                }
@@ -3400,3 +3724,7 @@ ZFS_MODULE_PARAM(zfs_recv, zfs_recv_, queue_ff, INT, ZMOD_RW,
 
 ZFS_MODULE_PARAM(zfs_recv, zfs_recv_, write_batch_size, INT, ZMOD_RW,
        "Maximum amount of writes to batch into one transaction");
+
+ZFS_MODULE_PARAM(zfs_recv, zfs_recv_, best_effort_corrective, INT, ZMOD_RW,
+       "Ignore errors during corrective receive");
+/* END CSTYLED */
index 81e8209cde6b65bfe1433d317dc44357564f9369..b2b59af42947b3ba55b04468500e684216853ebc 100644 (file)
@@ -1315,6 +1315,9 @@ spa_activate(spa_t *spa, spa_mode_t mode)
        avl_create(&spa->spa_errlist_last,
            spa_error_entry_compare, sizeof (spa_error_entry_t),
            offsetof(spa_error_entry_t, se_avl));
+       avl_create(&spa->spa_errlist_healed,
+           spa_error_entry_compare, sizeof (spa_error_entry_t),
+           offsetof(spa_error_entry_t, se_avl));
 
        spa_activate_os(spa);
 
@@ -1425,6 +1428,7 @@ spa_deactivate(spa_t *spa)
        spa_errlog_drain(spa);
        avl_destroy(&spa->spa_errlist_scrub);
        avl_destroy(&spa->spa_errlist_last);
+       avl_destroy(&spa->spa_errlist_healed);
 
        spa_keystore_fini(&spa->spa_keystore);
 
index 95cf90983f041e4e07b7d0977f80d0a5993790e2..4572a6e56f0bc04515f1c301c249c4889bd19f2e 100644 (file)
@@ -22,6 +22,7 @@
  * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2013, 2014, Delphix. All rights reserved.
  * Copyright (c) 2021, George Amanakis. All rights reserved.
+ * Copyright (c) 2019 Datto Inc.
  */
 
 /*
 #include <sys/dmu_objset.h>
 #include <sys/dbuf.h>
 
+#define        NAME_MAX_LEN 64
+
 /*
  * spa_upgrade_errlog_limit : A zfs module parameter that controls the number
- *             of on-disk error log entries that will be converted to the new
- *             format when enabling head_errlog. Defaults to 0 which converts
- *             all log entries.
+ *             of on-disk error log entries that will be converted to the new
+ *             format when enabling head_errlog. Defaults to 0 which converts
+ *             all log entries.
  */
 static uint32_t spa_upgrade_errlog_limit = 0;
 
@@ -511,6 +514,103 @@ get_errlist_size(spa_t *spa, avl_tree_t *tree)
 }
 #endif
 
+/*
+ * If a healed bookmark matches an entry in the error log we stash it in a tree
+ * so that we can later remove the related log entries in sync context.
+ */
+static void
+spa_add_healed_error(spa_t *spa, uint64_t obj, zbookmark_phys_t *healed_zb)
+{
+       char name[NAME_MAX_LEN];
+
+       if (obj == 0)
+               return;
+
+       bookmark_to_name(healed_zb, name, sizeof (name));
+       mutex_enter(&spa->spa_errlog_lock);
+       if (zap_contains(spa->spa_meta_objset, obj, name) == 0) {
+               /*
+                * Found an error matching healed zb, add zb to our
+                * tree of healed errors
+                */
+               avl_tree_t *tree = &spa->spa_errlist_healed;
+               spa_error_entry_t search;
+               spa_error_entry_t *new;
+               avl_index_t where;
+               search.se_bookmark = *healed_zb;
+               mutex_enter(&spa->spa_errlist_lock);
+               if (avl_find(tree, &search, &where) != NULL) {
+                       mutex_exit(&spa->spa_errlist_lock);
+                       mutex_exit(&spa->spa_errlog_lock);
+                       return;
+               }
+               new = kmem_zalloc(sizeof (spa_error_entry_t), KM_SLEEP);
+               new->se_bookmark = *healed_zb;
+               avl_insert(tree, new, where);
+               mutex_exit(&spa->spa_errlist_lock);
+       }
+       mutex_exit(&spa->spa_errlog_lock);
+}
+
+/*
+ * If this error exists in the given tree remove it.
+ */
+static void
+remove_error_from_list(spa_t *spa, avl_tree_t *t, const zbookmark_phys_t *zb)
+{
+       spa_error_entry_t search, *found;
+       avl_index_t where;
+
+       mutex_enter(&spa->spa_errlist_lock);
+       search.se_bookmark = *zb;
+       if ((found = avl_find(t, &search, &where)) != NULL) {
+               avl_remove(t, found);
+               kmem_free(found, sizeof (spa_error_entry_t));
+       }
+       mutex_exit(&spa->spa_errlist_lock);
+}
+
+
+/*
+ * Removes all of the recv healed errors from both on-disk error logs
+ */
+static void
+spa_remove_healed_errors(spa_t *spa, avl_tree_t *s, avl_tree_t *l, dmu_tx_t *tx)
+{
+       char name[NAME_MAX_LEN];
+       spa_error_entry_t *se;
+       void *cookie = NULL;
+
+       ASSERT(MUTEX_HELD(&spa->spa_errlog_lock));
+
+       while ((se = avl_destroy_nodes(&spa->spa_errlist_healed,
+           &cookie)) != NULL) {
+               remove_error_from_list(spa, s, &se->se_bookmark);
+               remove_error_from_list(spa, l, &se->se_bookmark);
+               bookmark_to_name(&se->se_bookmark, name, sizeof (name));
+               kmem_free(se, sizeof (spa_error_entry_t));
+               (void) zap_remove(spa->spa_meta_objset,
+                   spa->spa_errlog_last, name, tx);
+               (void) zap_remove(spa->spa_meta_objset,
+                   spa->spa_errlog_scrub, name, tx);
+       }
+}
+
+/*
+ * Stash away healed bookmarks to remove them from the on-disk error logs
+ * later in spa_remove_healed_errors().
+ */
+void
+spa_remove_error(spa_t *spa, zbookmark_phys_t *zb)
+{
+       char name[NAME_MAX_LEN];
+
+       bookmark_to_name(zb, name, sizeof (name));
+
+       spa_add_healed_error(spa, spa->spa_errlog_last, zb);
+       spa_add_healed_error(spa, spa->spa_errlog_scrub, zb);
+}
+
 /*
  * Return the number of errors currently in the error log.  This is actually the
  * sum of both the last log and the current log, since we don't know the union
@@ -887,7 +987,7 @@ void
 sync_error_list(spa_t *spa, avl_tree_t *t, uint64_t *obj, dmu_tx_t *tx)
 {
        spa_error_entry_t *se;
-       char buf[64];
+       char buf[NAME_MAX_LEN];
        void *cookie;
 
        if (avl_numnodes(t) == 0)
@@ -992,6 +1092,7 @@ spa_errlog_sync(spa_t *spa, uint64_t txg)
         */
        if (avl_numnodes(&spa->spa_errlist_scrub) == 0 &&
            avl_numnodes(&spa->spa_errlist_last) == 0 &&
+           avl_numnodes(&spa->spa_errlist_healed) == 0 &&
            !spa->spa_scrub_finished) {
                mutex_exit(&spa->spa_errlist_lock);
                return;
@@ -1006,6 +1107,11 @@ spa_errlog_sync(spa_t *spa, uint64_t txg)
 
        tx = dmu_tx_create_assigned(spa->spa_dsl_pool, txg);
 
+       /*
+        * Remove healed errors from errors.
+        */
+       spa_remove_healed_errors(spa, &last, &scrub, tx);
+
        /*
         * Sync out the current list of errors.
         */
index 571e555738ba93a4dfbf37e597978437199ab6f6..382975208b9747f077e4d78e91e6e3b21883e211 100644 (file)
@@ -4928,7 +4928,7 @@ static boolean_t zfs_ioc_recv_inject_err;
 static int
 zfs_ioc_recv_impl(char *tofs, char *tosnap, char *origin, nvlist_t *recvprops,
     nvlist_t *localprops, nvlist_t *hidden_args, boolean_t force,
-    boolean_t resumable, int input_fd,
+    boolean_t heal, boolean_t resumable, int input_fd,
     dmu_replay_record_t *begin_record, uint64_t *read_bytes,
     uint64_t *errflags, nvlist_t **errors)
 {
@@ -4953,7 +4953,7 @@ zfs_ioc_recv_impl(char *tofs, char *tosnap, char *origin, nvlist_t *recvprops,
                return (SET_ERROR(EBADF));
 
        noff = off = zfs_file_off(input_fp);
-       error = dmu_recv_begin(tofs, tosnap, begin_record, force,
+       error = dmu_recv_begin(tofs, tosnap, begin_record, force, heal,
            resumable, localprops, hidden_args, origin, &drc, input_fp,
            &off);
        if (error != 0)
@@ -5296,7 +5296,7 @@ zfs_ioc_recv(zfs_cmd_t *zc)
        begin_record.drr_u.drr_begin = zc->zc_begin_record;
 
        error = zfs_ioc_recv_impl(tofs, tosnap, origin, recvdprops, localprops,
-           NULL, zc->zc_guid, B_FALSE, zc->zc_cookie, &begin_record,
+           NULL, zc->zc_guid, B_FALSE, B_FALSE, zc->zc_cookie, &begin_record,
            &zc->zc_cookie, &zc->zc_obj, &errors);
        nvlist_free(recvdprops);
        nvlist_free(localprops);
@@ -5329,6 +5329,7 @@ zfs_ioc_recv(zfs_cmd_t *zc)
  *     "begin_record" -> non-byteswapped dmu_replay_record_t
  *     "input_fd" -> file descriptor to read stream from (int32)
  *     (optional) "force" -> force flag (value ignored)
+ *     (optional) "heal" -> use send stream to heal data corruption
  *     (optional) "resumable" -> resumable flag (value ignored)
  *     (optional) "cleanup_fd" -> unused
  *     (optional) "action_handle" -> unused
@@ -5349,6 +5350,7 @@ static const zfs_ioc_key_t zfs_keys_recv_new[] = {
        {"begin_record",        DATA_TYPE_BYTE_ARRAY,   0},
        {"input_fd",            DATA_TYPE_INT32,        0},
        {"force",               DATA_TYPE_BOOLEAN,      ZK_OPTIONAL},
+       {"heal",                DATA_TYPE_BOOLEAN,      ZK_OPTIONAL},
        {"resumable",           DATA_TYPE_BOOLEAN,      ZK_OPTIONAL},
        {"cleanup_fd",          DATA_TYPE_INT32,        ZK_OPTIONAL},
        {"action_handle",       DATA_TYPE_UINT64,       ZK_OPTIONAL},
@@ -5369,6 +5371,7 @@ zfs_ioc_recv_new(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
        char *tosnap;
        char tofs[ZFS_MAX_DATASET_NAME_LEN];
        boolean_t force;
+       boolean_t heal;
        boolean_t resumable;
        uint64_t read_bytes = 0;
        uint64_t errflags = 0;
@@ -5398,6 +5401,7 @@ zfs_ioc_recv_new(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
        input_fd = fnvlist_lookup_int32(innvl, "input_fd");
 
        force = nvlist_exists(innvl, "force");
+       heal = nvlist_exists(innvl, "heal");
        resumable = nvlist_exists(innvl, "resumable");
 
        /* we still use "props" here for backwards compatibility */
@@ -5414,7 +5418,7 @@ zfs_ioc_recv_new(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
                return (error);
 
        error = zfs_ioc_recv_impl(tofs, tosnap, origin, recvprops, localprops,
-           hidden_args, force, resumable, input_fd, begin_record,
+           hidden_args, force, heal, resumable, input_fd, begin_record,
            &read_bytes, &errflags, &errors);
 
        fnvlist_add_uint64(outnvl, "read_bytes", read_bytes);
index 3d1ac36d96da104bfb99fad4dd11d25059a56400..7b55450ca906b3373baba33f1ea6b160e67fa5c1 100644 (file)
@@ -882,7 +882,7 @@ zio_create(zio_t *pio, spa_t *spa, uint64_t txg, const blkptr_t *bp,
        return (zio);
 }
 
-static void
+void
 zio_destroy(zio_t *zio)
 {
        metaslab_trace_fini(&zio->io_alloc_list);
index a4ec27a368ac9f3da453a831df1af72aa70040e3..8055c51932bfe4eee015ce44c5783e226ce116f4 100644 (file)
@@ -241,7 +241,8 @@ tests = ['zfs_receive_001_pos', 'zfs_receive_002_pos', 'zfs_receive_003_pos',
     'zfs_receive_from_encrypted', 'zfs_receive_to_encrypted',
     'zfs_receive_raw', 'zfs_receive_raw_incremental', 'zfs_receive_-e',
     'zfs_receive_raw_-d', 'zfs_receive_from_zstd', 'zfs_receive_new_props',
-    'zfs_receive_-wR-encrypted-mix']
+    'zfs_receive_-wR-encrypted-mix', 'zfs_receive_corrective',
+    'zfs_receive_compressed_corrective']
 tags = ['functional', 'cli_root', 'zfs_receive']
 
 [tests/functional/cli_root/zfs_rename]
index e84a00273cf8b315a664b974e265737138aa04bd..434cc863f36cddbe991810db7e20d7e20efdb3f2 100644 (file)
@@ -545,6 +545,7 @@ test_recv_new(const char *dataset, int fd)
        fnvlist_add_string(props, "org.openzfs:launch", "September 17th, 2013");
        fnvlist_add_nvlist(optional, "localprops", props);
        fnvlist_add_boolean(optional, "force");
+       fnvlist_add_boolean(optional, "heal");
        fnvlist_add_int32(optional, "cleanup_fd", cleanup_fd);
 
        /*
index 4c5b1121293e2d043b0567572dbc39f8c6f0301b..b13f66dc3e1be0c8a80f78b7d58729f92be63e1c 100644 (file)
@@ -766,6 +766,8 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \
        functional/cli_root/zfs_receive/zfs_receive_raw.ksh \
        functional/cli_root/zfs_receive/zfs_receive_to_encrypted.ksh \
        functional/cli_root/zfs_receive/zfs_receive_-wR-encrypted-mix.ksh \
+       functional/cli_root/zfs_receive/zfs_receive_corrective.ksh \
+       functional/cli_root/zfs_receive/zfs_receive_compressed_corrective.ksh \
        functional/cli_root/zfs_rename/cleanup.ksh \
        functional/cli_root/zfs_rename/setup.ksh \
        functional/cli_root/zfs_rename/zfs_rename_001_pos.ksh \
diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_compressed_corrective.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_compressed_corrective.ksh
new file mode 100755 (executable)
index 0000000..7f8eb0b
--- /dev/null
@@ -0,0 +1,193 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2019 Datto, Inc. All rights reserved.
+# Copyright (c) 2022 Axcient.
+#
+
+. $STF_SUITE/include/libtest.shlib
+
+#
+# DESCRIPTION:
+# OpenZFS should be able to heal data using corrective recv when the send file
+#   was generated with the --compressed flag
+#
+# STRATEGY:
+# 0. Create a file, checksum the file to be corrupted then compare it's checksum
+#    with the one obtained after healing under different testing scenarios:
+# 1. Test healing (aka corrective) recv from a full send file
+# 2. Test healing recv (aka heal recv) from an incremental send file
+# 3. Test healing recv when compression on-disk is off but source was compressed
+# 4. Test heal recv when compression on-disk is on but source was uncompressed
+# 5. Test heal recv when compression doesn't match between send file and on-disk
+# 6. Test healing recv of an encrypted dataset using an unencrypted send file
+# 7. Test healing recv (on an encrypted dataset) using a raw send file
+# 8. Test healing when specifying destination filesystem only (no snapshot)
+# 9. Test incremental recv aftear healing recv
+#
+
+verify_runnable "both"
+
+DISK=${DISKS%% *}
+
+backup=$TEST_BASE_DIR/backup
+raw_backup=$TEST_BASE_DIR/raw_backup
+ibackup=$TEST_BASE_DIR/ibackup
+unc_backup=$TEST_BASE_DIR/unc_backup
+
+function cleanup
+{
+       log_must rm -f $backup $raw_backup $ibackup $unc_backup
+
+       poolexists $TESTPOOL && destroy_pool $TESTPOOL
+       log_must zpool create -f $TESTPOOL $DISK
+}
+
+function test_corrective_recv
+{
+       log_must zpool scrub -w $TESTPOOL
+       log_must zpool status -v $TESTPOOL
+       log_must eval "zpool status -v $TESTPOOL | \
+           grep \"Permanent errors have been detected\""
+
+       # make sure we will read the corruption from disk by flushing the ARC
+       log_must zinject -a
+
+       log_must eval "zfs recv -c $1 < $2"
+
+       log_must zpool scrub -w $TESTPOOL
+       log_must zpool status -v $TESTPOOL
+       log_mustnot eval "zpool status -v $TESTPOOL | \
+           grep \"Permanent errors have been detected\""
+       typeset cksum=$(md5digest $file)
+       [[ "$cksum" == "$checksum" ]] || \
+               log_fail "Checksums differ ($cksum != $checksum)"
+}
+
+log_onexit cleanup
+
+log_assert "ZFS corrective receive should be able to heal data corruption"
+
+typeset passphrase="password"
+typeset file="/$TESTPOOL/$TESTFS1/$TESTFILE0"
+
+log_must eval "poolexists $TESTPOOL && destroy_pool $TESTPOOL"
+log_must zpool create -f -o feature@head_errlog=disabled $TESTPOOL $DISK
+
+log_must eval "echo $passphrase > /$TESTPOOL/pwd"
+
+log_must zfs create -o primarycache=none \
+    -o atime=off -o compression=lz4 $TESTPOOL/$TESTFS1
+
+log_must dd if=/dev/urandom of=$file bs=1024 count=1024 oflag=sync
+log_must eval "echo 'aaaaaaaa' >> "$file
+typeset checksum=$(md5digest $file)
+
+log_must zfs snapshot $TESTPOOL/$TESTFS1@snap1
+
+# create full send file
+log_must eval "zfs send --compressed $TESTPOOL/$TESTFS1@snap1 > $backup"
+
+log_must dd if=/dev/urandom of=$file"1" bs=1024 count=1024 oflag=sync
+log_must eval "echo 'bbbbbbbb' >> "$file"1"
+log_must zfs snapshot $TESTPOOL/$TESTFS1@snap2
+# create incremental send file
+log_must eval "zfs send -c -i $TESTPOOL/$TESTFS1@snap1 \
+    $TESTPOOL/$TESTFS1@snap2 > $ibackup"
+
+corrupt_blocks_at_level $file 0
+# test healing recv from a full send file
+test_corrective_recv $TESTPOOL/$TESTFS1@snap1 $backup
+
+corrupt_blocks_at_level $file"1" 0
+# test healing recv from an incremental send file
+test_corrective_recv $TESTPOOL/$TESTFS1@snap2 $ibackup
+
+# create new uncompressed dataset using our send file
+log_must eval "zfs recv -o compression=off -o primarycache=none \
+    $TESTPOOL/$TESTFS2 < $backup"
+typeset compr=$(get_prop compression $TESTPOOL/$TESTFS2)
+[[ "$compr" == "off" ]] || \
+       log_fail "Unexpected compression $compr in recved dataset"
+corrupt_blocks_at_level "/$TESTPOOL/$TESTFS2/$TESTFILE0" 0
+# test healing recv when compression on-disk is off but source was compressed
+test_corrective_recv "$TESTPOOL/$TESTFS2@snap1" $backup
+
+# create a full sendfile from an uncompressed source
+log_must eval "zfs send --compressed $TESTPOOL/$TESTFS2@snap1 > $unc_backup"
+log_must eval "zfs recv -o compression=gzip -o primarycache=none \
+    $TESTPOOL/testfs3 < $unc_backup"
+typeset compr=$(get_prop compression $TESTPOOL/testfs3)
+[[ "$compr" == "gzip" ]] || \
+       log_fail "Unexpected compression $compr in recved dataset"
+corrupt_blocks_at_level "/$TESTPOOL/testfs3/$TESTFILE0" 0
+# test healing recv when compression on-disk is on but source was uncompressed
+test_corrective_recv "$TESTPOOL/testfs3@snap1" $unc_backup
+
+# create new compressed dataset using our send file
+log_must eval "zfs recv -o compression=gzip -o primarycache=none \
+    $TESTPOOL/testfs4 < $backup"
+typeset compr=$(get_prop compression $TESTPOOL/testfs4)
+[[ "$compr" == "gzip" ]] || \
+       log_fail "Unexpected compression $compr in recved dataset"
+corrupt_blocks_at_level "/$TESTPOOL/testfs4/$TESTFILE0" 0
+# test healing recv when compression doesn't match between send file and on-disk
+test_corrective_recv "$TESTPOOL/testfs4@snap1" $backup
+
+# create new encrypted (and compressed) dataset using our send file
+log_must eval "zfs recv -o encryption=aes-256-ccm -o keyformat=passphrase \
+    -o keylocation=file:///$TESTPOOL/pwd -o primarycache=none \
+    $TESTPOOL/testfs5 < $backup"
+typeset encr=$(get_prop encryption $TESTPOOL/testfs5)
+[[ "$encr" == "aes-256-ccm" ]] || \
+       log_fail "Unexpected encryption $encr in recved dataset"
+log_must eval "zfs send --raw $TESTPOOL/testfs5@snap1 > $raw_backup"
+log_must eval "zfs send --compressed $TESTPOOL/testfs5@snap1 > $backup"
+corrupt_blocks_at_level "/$TESTPOOL/testfs5/$TESTFILE0" 0
+# test healing recv of an encrypted dataset using an unencrypted send file
+test_corrective_recv "$TESTPOOL/testfs5@snap1" $backup
+corrupt_blocks_at_level "/$TESTPOOL/testfs5/$TESTFILE0" 0
+log_must zfs unmount $TESTPOOL/testfs5
+log_must zfs unload-key $TESTPOOL/testfs5
+# test healing recv (on an encrypted dataset) using a raw send file
+test_corrective_recv "$TESTPOOL/testfs5@snap1" $raw_backup
+# non raw send file healing an encrypted dataset with an unloaded key will fail
+log_mustnot eval "zfs recv -c $TESTPOOL/testfs5@snap1 < $backup"
+
+log_must zfs rollback -r $TESTPOOL/$TESTFS1@snap1
+corrupt_blocks_at_level $file 0
+# test healing when specifying destination filesystem only (no snapshot)
+test_corrective_recv $TESTPOOL/$TESTFS1 $backup
+# test incremental recv aftear healing recv
+log_must eval "zfs recv $TESTPOOL/$TESTFS1 < $ibackup"
+
+# test that healing recv can not be combined with incompatible recv options
+log_mustnot eval "zfs recv -h -c $TESTPOOL/$TESTFS1@snap1 < $backup"
+log_mustnot eval "zfs recv -F -c $TESTPOOL/$TESTFS1@snap1 < $backup"
+log_mustnot eval "zfs recv -s -c $TESTPOOL/$TESTFS1@snap1 < $backup"
+log_mustnot eval "zfs recv -u -c $TESTPOOL/$TESTFS1@snap1 < $backup"
+log_mustnot eval "zfs recv -d -c $TESTPOOL/$TESTFS1@snap1 < $backup"
+log_mustnot eval "zfs recv -e -c $TESTPOOL/$TESTFS1@snap1 < $backup"
+
+# ensure healing recv doesn't work when snap GUIDS don't match
+log_mustnot eval "zfs recv -c $TESTPOOL/testfs5@snap2 < $backup"
+log_mustnot eval "zfs recv -c $TESTPOOL/testfs5 < $backup"
+
+# test that healing recv doesn't work on non-existing snapshots
+log_mustnot eval "zfs recv -c $TESTPOOL/$TESTFS1@missing < $backup"
+
+log_pass "OpenZFS corrective recv works for data healing"
diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_corrective.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_corrective.ksh
new file mode 100755 (executable)
index 0000000..b2bbdf2
--- /dev/null
@@ -0,0 +1,192 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2019 Datto, Inc. All rights reserved.
+# Copyright (c) 2022 Axcient.
+#
+
+. $STF_SUITE/include/libtest.shlib
+
+#
+# DESCRIPTION:
+# OpenZFS should be able to heal data using corrective recv
+#
+# STRATEGY:
+# 0. Create a file, checksum the file to be corrupted then compare it's checksum
+#    with the one obtained after healing under different testing scenarios:
+# 1. Test healing (aka corrective) recv from a full send file
+# 2. Test healing recv (aka heal recv) from an incremental send file
+# 3. Test healing recv when compression on-disk is off but source was compressed
+# 4. Test heal recv when compression on-disk is on but source was uncompressed
+# 5. Test heal recv when compression doesn't match between send file and on-disk
+# 6. Test healing recv of an encrypted dataset using an unencrypted send file
+# 7. Test healing recv (on an encrypted dataset) using a raw send file
+# 8. Test healing when specifying destination filesystem only (no snapshot)
+# 9. Test incremental recv aftear healing recv
+#
+
+verify_runnable "both"
+
+DISK=${DISKS%% *}
+
+backup=$TEST_BASE_DIR/backup
+raw_backup=$TEST_BASE_DIR/raw_backup
+ibackup=$TEST_BASE_DIR/ibackup
+unc_backup=$TEST_BASE_DIR/unc_backup
+
+function cleanup
+{
+       log_must rm -f $backup $raw_backup $ibackup $unc_backup
+
+       poolexists $TESTPOOL && destroy_pool $TESTPOOL
+       log_must zpool create -f $TESTPOOL $DISK
+}
+
+function test_corrective_recv
+{
+       log_must zpool scrub -w $TESTPOOL
+       log_must zpool status -v $TESTPOOL
+       log_must eval "zpool status -v $TESTPOOL | \
+           grep \"Permanent errors have been detected\""
+
+       # make sure we will read the corruption from disk by flushing the ARC
+       log_must zinject -a
+
+       log_must eval "zfs recv -c $1 < $2"
+
+       log_must zpool scrub -w $TESTPOOL
+       log_must zpool status -v $TESTPOOL
+       log_mustnot eval "zpool status -v $TESTPOOL | \
+           grep \"Permanent errors have been detected\""
+       typeset cksum=$(md5digest $file)
+       [[ "$cksum" == "$checksum" ]] || \
+               log_fail "Checksums differ ($cksum != $checksum)"
+}
+
+log_onexit cleanup
+
+log_assert "ZFS corrective receive should be able to heal data corruption"
+
+typeset passphrase="password"
+typeset file="/$TESTPOOL/$TESTFS1/$TESTFILE0"
+
+log_must eval "poolexists $TESTPOOL && destroy_pool $TESTPOOL"
+log_must zpool create -f -o feature@head_errlog=disabled $TESTPOOL $DISK
+
+log_must eval "echo $passphrase > /$TESTPOOL/pwd"
+
+log_must zfs create -o primarycache=none \
+    -o atime=off -o compression=lz4 $TESTPOOL/$TESTFS1
+
+log_must dd if=/dev/urandom of=$file bs=1024 count=1024 oflag=sync
+log_must eval "echo 'aaaaaaaa' >> "$file
+typeset checksum=$(md5digest $file)
+
+log_must zfs snapshot $TESTPOOL/$TESTFS1@snap1
+
+# create full send file
+log_must eval "zfs send $TESTPOOL/$TESTFS1@snap1 > $backup"
+
+log_must dd if=/dev/urandom of=$file"1" bs=1024 count=1024 oflag=sync
+log_must eval "echo 'bbbbbbbb' >> "$file"1"
+log_must zfs snapshot $TESTPOOL/$TESTFS1@snap2
+# create incremental send file
+log_must eval "zfs send -i $TESTPOOL/$TESTFS1@snap1 \
+    $TESTPOOL/$TESTFS1@snap2 > $ibackup"
+
+corrupt_blocks_at_level $file 0
+# test healing recv from a full send file
+test_corrective_recv $TESTPOOL/$TESTFS1@snap1 $backup
+
+corrupt_blocks_at_level $file"1" 0
+# test healing recv from an incremental send file
+test_corrective_recv $TESTPOOL/$TESTFS1@snap2 $ibackup
+
+# create new uncompressed dataset using our send file
+log_must eval "zfs recv -o compression=off -o primarycache=none \
+    $TESTPOOL/$TESTFS2 < $backup"
+typeset compr=$(get_prop compression $TESTPOOL/$TESTFS2)
+[[ "$compr" == "off" ]] || \
+       log_fail "Unexpected compression $compr in recved dataset"
+corrupt_blocks_at_level "/$TESTPOOL/$TESTFS2/$TESTFILE0" 0
+# test healing recv when compression on-disk is off but source was compressed
+test_corrective_recv "$TESTPOOL/$TESTFS2@snap1" $backup
+
+# create a full sendfile from an uncompressed source
+log_must eval "zfs send $TESTPOOL/$TESTFS2@snap1 > $unc_backup"
+log_must eval "zfs recv -o compression=gzip -o primarycache=none \
+    $TESTPOOL/testfs3 < $unc_backup"
+typeset compr=$(get_prop compression $TESTPOOL/testfs3)
+[[ "$compr" == "gzip" ]] || \
+       log_fail "Unexpected compression $compr in recved dataset"
+corrupt_blocks_at_level "/$TESTPOOL/testfs3/$TESTFILE0" 0
+# test healing recv when compression on-disk is on but source was uncompressed
+test_corrective_recv "$TESTPOOL/testfs3@snap1" $unc_backup
+
+# create new compressed dataset using our send file
+log_must eval "zfs recv -o compression=gzip -o primarycache=none \
+    $TESTPOOL/testfs4 < $backup"
+typeset compr=$(get_prop compression $TESTPOOL/testfs4)
+[[ "$compr" == "gzip" ]] || \
+       log_fail "Unexpected compression $compr in recved dataset"
+corrupt_blocks_at_level "/$TESTPOOL/testfs4/$TESTFILE0" 0
+# test healing recv when compression doesn't match between send file and on-disk
+test_corrective_recv "$TESTPOOL/testfs4@snap1" $backup
+
+# create new encrypted (and compressed) dataset using our send file
+log_must eval "zfs recv -o encryption=aes-256-ccm -o keyformat=passphrase \
+    -o keylocation=file:///$TESTPOOL/pwd -o primarycache=none \
+    $TESTPOOL/testfs5 < $backup"
+typeset encr=$(get_prop encryption $TESTPOOL/testfs5)
+[[ "$encr" == "aes-256-ccm" ]] || \
+       log_fail "Unexpected encryption $encr in recved dataset"
+log_must eval "zfs send --raw $TESTPOOL/testfs5@snap1 > $raw_backup"
+log_must eval "zfs send $TESTPOOL/testfs5@snap1 > $backup"
+corrupt_blocks_at_level "/$TESTPOOL/testfs5/$TESTFILE0" 0
+# test healing recv of an encrypted dataset using an unencrypted send file
+test_corrective_recv "$TESTPOOL/testfs5@snap1" $backup
+corrupt_blocks_at_level "/$TESTPOOL/testfs5/$TESTFILE0" 0
+log_must zfs unmount $TESTPOOL/testfs5
+log_must zfs unload-key $TESTPOOL/testfs5
+# test healing recv (on an encrypted dataset) using a raw send file
+test_corrective_recv "$TESTPOOL/testfs5@snap1" $raw_backup
+# non raw send file healing an encrypted dataset with an unloaded key will fail
+log_mustnot eval "zfs recv -c $TESTPOOL/testfs5@snap1 < $backup"
+
+log_must zfs rollback -r $TESTPOOL/$TESTFS1@snap1
+corrupt_blocks_at_level $file 0
+# test healing when specifying destination filesystem only (no snapshot)
+test_corrective_recv $TESTPOOL/$TESTFS1 $backup
+# test incremental recv aftear healing recv
+log_must eval "zfs recv $TESTPOOL/$TESTFS1 < $ibackup"
+
+# test that healing recv can not be combined with incompatible recv options
+log_mustnot eval "zfs recv -h -c $TESTPOOL/$TESTFS1@snap1 < $backup"
+log_mustnot eval "zfs recv -F -c $TESTPOOL/$TESTFS1@snap1 < $backup"
+log_mustnot eval "zfs recv -s -c $TESTPOOL/$TESTFS1@snap1 < $backup"
+log_mustnot eval "zfs recv -u -c $TESTPOOL/$TESTFS1@snap1 < $backup"
+log_mustnot eval "zfs recv -d -c $TESTPOOL/$TESTFS1@snap1 < $backup"
+log_mustnot eval "zfs recv -e -c $TESTPOOL/$TESTFS1@snap1 < $backup"
+
+# ensure healing recv doesn't work when snap GUIDS don't match
+log_mustnot eval "zfs recv -c $TESTPOOL/testfs5@snap2 < $backup"
+log_mustnot eval "zfs recv -c $TESTPOOL/testfs5 < $backup"
+
+# test that healing recv doesn't work on non-existing snapshots
+log_mustnot eval "zfs recv -c $TESTPOOL/$TESTFS1@missing < $backup"
+
+log_pass "OpenZFS corrective recv works for data healing"